<html lang="zh-x-mtfrom-en" class="translated-ltr"><head><script>(function(){(function(){function e(a){this.t={};this.tick=function(a,c,b){this.t[a]=[void 0!=b?b:(new Date).getTime(),c];if(void 0==b)try{window.console.timeStamp("CSI/"+a)}catch(h){}};this.tick("start",null,a)}var a;if(window.performance)var d=(a=window.performance.timing)&&a.responseStart;var f=0<d?new e(d):new e;window.jstiming={Timer:e,load:f};if(a){var c=a.navigationStart;0<c&&d>=c&&(window.jstiming.srt=d-c)}if(a){var b=window.jstiming.load;0<c&&d>=c&&(b.tick("_wtsrt",void 0,c),b.tick("wtsrt_","_wtsrt",
d),b.tick("tbsd_","wtsrt_"))}try{a=null,window.chrome&&window.chrome.csi&&(a=Math.floor(window.chrome.csi().pageT),b&&0<c&&(b.tick("_tbnd",void 0,window.chrome.csi().startE),b.tick("tbnd_","_tbnd",c))),null==a&&window.gtbExternal&&(a=window.gtbExternal.pageT()),null==a&&window.external&&(a=window.external.pageT,b&&0<c&&(b.tick("_tbnd",void 0,window.external.startE),b.tick("tbnd_","_tbnd",c))),a&&(window.jstiming.pt=a)}catch(g){}})();}).call(window);
</script><script src="https://translate.googleusercontent.com/translate/releases/twsfe_w_20180206_RC00/r/js/translate_c.js"></script><script>_intlStrings._originalText = "英语原文:";_intlStrings._interfaceDirection="ltr";_intlStrings._interfaceAlign="left";_intlStrings._langpair="en|zh-CN";_intlStrings._feedbackUrl="https://translate.google.com/translate_suggestion";_intlStrings._currentBy="当前的翻译是由%2$s在 %1$s完成的";_intlStrings._unknown="佚名译者";_intlStrings._suggestTranslation="更好的翻译建议"  ;_intlStrings._submit="提供建议";_intlStrings._suggestThanks="感谢您为 Google 翻译提供翻译建议。";_intlStrings._reverse=false;_intlStrings._staticContentPath="https://www.gstatic.com/translate/infowindow/";</script><style type="text/css">.google-src-text {display: none !important} .google-src-active-text {display: block!important;color:black!important; font-size:12px!important;font-family:arial,sans-serif!important}.google-src-active-text a {font-size:12px!important}.google-src-active-text a:link {color:#00c!important;text-decoration:underline!important}.google-src-active-text a:visited {color:purple!important;text-decoration:underline!important}.google-src-active-text a:active {color:red!important;text-decoration:underline!important}</style><meta http-equiv="X-Translated-By" content="Google"><link href="强化学习+机械臂-Learning to Control a Low-Cost Manipulator using.pdf" hreflang="en" rel="alternate machine-translated-from"><base href="" target="_top"><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta name="Title" content="Learning to Control a Low-Cost Manipulator using Data-Efficient Reinforcement Learning"><meta name="CreationDate" content="D:20101201120000"><meta name="Title" content="Learning to Control a Low-Cost Manipulator using Data-Efficient Reinforcement Learning"><meta name="Creator" content="LaTeX with hyperref package"><meta name="Producer" content="pdfTeX-1.40.10"><meta name="ModDate" content="D:20110531215009-07'00'"><meta name="Fullbanner" content="This is pdfTeX, Version 3.1415926-1.40.10-2.2 (TeX Live 2009/Debian) kpathsea version 5.0.0"><title>学习使用数据有效的强化学习来控制低成本的机器人</title><link type="text/css" rel="stylesheet" charset="UTF-8" href="https://translate.googleapis.com/translate_static/css/translateelement.css"><script type="text/javascript" charset="UTF-8" src="https://translate.googleapis.com/translate_static/js/element/main_zh-CN.js"></script><script type="text/javascript" charset="UTF-8" src="https://translate.googleapis.com/element/TE_20170911_00/e/js/element/element_main.js"></script><link rel="stylesheet" type="text/css" href="https://www.gstatic.com/translate/infowindow/iw_sprite.css"></head><body bgcolor="#ffffff" vlink="blue" link="blue"><iframe src="https://translate.google.com/translate_un?hl=zh-CN&amp;prev=_t&amp;sl=en&amp;tl=zh-CN&amp;lang=en&amp;usg=ALkJrhh3Ks0Y3dsi0nEukcuohymuV5KGIw" width="0" height="0" frameborder="0" style="width:0px;height:0px;border:0px;display:none;"></iframe><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="1"><b>Page 1</b></a></font></span> <font face="arial,sans-serif"><a name="1"><b>第1页</b></a></font></span> </td></tr></tbody></table><div style="position:absolute;top:268;left:81"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Learning to Control a Low-Cost Manipulator using</nobr></span> <nobr>学习如何控制一个低成本的机器人</nobr></span> </div><div style="position:absolute;top:310;left:174"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Data-Efficient Reinforcement Learning</nobr></span> <nobr>数据有效的强化学习</nobr></span> </div><div style="position:absolute;top:370;left:149"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Marc Peter Deisenroth</nobr></span> <nobr>Marc Peter Deisenroth</nobr></span> </div><div style="position:absolute;top:390;left:94"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Dept. of Computer Science &amp; Engineering</nobr></span> <nobr>计算机科学与工程系</nobr></span> </div><div style="position:absolute;top:408;left:147"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>University of Washington</nobr></span> <nobr>华盛顿大学</nobr></span> </div><div style="position:absolute;top:427;left:169"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Seattle, WA, USA</nobr></span> <nobr>西雅图，华盛顿州，美国</nobr></span> </div><div style="position:absolute;top:370;left:377"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Carl Edward Rasmussen</nobr></span> <nobr>卡尔·爱德华·拉斯穆森</nobr></span> </div><div style="position:absolute;top:390;left:395"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Dept. of Engineering</nobr></span> <nobr>工程系</nobr></span> </div><div style="position:absolute;top:408;left:383"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>University of Cambridge</nobr></span> <nobr>剑桥大学</nobr></span> </div><div style="position:absolute;top:427;left:411"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Cambridge, UK</nobr></span> <nobr>英国剑桥</nobr></span> </div><div style="position:absolute;top:370;left:657"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Dieter Fox</nobr></span> <nobr>迪特·福克斯</nobr></span> </div><div style="position:absolute;top:390;left:562"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Dept. of Computer Science &amp; Engineering</nobr></span> <nobr>计算机科学与工程系</nobr></span> </div><div style="position:absolute;top:408;left:615"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>University of Washington</nobr></span> <nobr>华盛顿大学</nobr></span> </div><div style="position:absolute;top:427;left:637"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Seattle, WA, USA</nobr></span> <nobr>西雅图，华盛顿州，美国</nobr></span> </div><div style="position:absolute;top:498;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Abstract—Over the last years, there has been substantial</nobr></span> <nobr>摘要 - 在过去的几年中，已经有了相当大的</nobr></span> </div><div style="position:absolute;top:513;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>progress in robust manipulation in unstructured environments.</nobr></span> <nobr>在非结构化环境中进行强大的操作。</nobr></span> </div><div style="position:absolute;top:528;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The long-term goal of our work is to get away from precise,</nobr></span> <nobr>我们工作的长远目标是远离精确，</nobr></span> </div><div style="position:absolute;top:543;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>but very expensive robotic systems and to develop affordable,</nobr></span> <nobr>但非常昂贵的机器人系统和开发负担得起的，</nobr></span> </div><div style="position:absolute;top:558;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>potentially imprecise, self-adaptive manipulator systems that can</nobr></span> <nobr>可能不精确的自适应操纵系统</nobr></span> </div><div style="position:absolute;top:572;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>interactively perform tasks such as playing with children.</nobr></span> <nobr>交互式地执行诸如和孩子一起玩的任务。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In</nobr></span> <nobr>在</nobr></span> </div><div style="position:absolute;top:587;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>this paper, we demonstrate how a low-cost off-the-shelf robotic</nobr></span> <nobr>本文中，我们演示了一个低成本的现成机器人</nobr></span> </div><div style="position:absolute;top:602;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>system can learn closed-loop policies for a stacking task in only</nobr></span> <nobr>系统只能学习堆叠任务的闭环策略</nobr></span> </div><div style="position:absolute;top:617;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a handful of trials—from scratch.</nobr></span> <nobr>从零开始进行一些试验。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Our manipulator is inaccurate</nobr></span> <nobr>我们的操纵者是不准确的</nobr></span> </div><div style="position:absolute;top:632;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and provides no pose feedback.</nobr></span> <nobr>并没有提供姿态反馈。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>For learning a controller in the</nobr></span> <nobr>为了学习控制器</nobr></span> </div><div style="position:absolute;top:647;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>work space of a Kinect-style depth camera, we use a model-based</nobr></span> <nobr>Kinect式深度相机的工作空间，我们使用基于模型的</nobr></span> </div><div style="position:absolute;top:662;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>reinforcement learning technique.</nobr></span> <nobr>强化学习技术。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Our learning method is data</nobr></span> <nobr>我们的学习方法是数据</nobr></span> </div><div style="position:absolute;top:677;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>efficient, reduces model bias, and deals with several noise sources</nobr></span> <nobr>高效，减少模型偏差，并处理多个噪声源</nobr></span> </div><div style="position:absolute;top:692;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>in a principled way during long-term planning.</nobr></span> <nobr>在长期规划中采取原则性的方式。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We present a</nobr></span> <nobr>我们介绍一个</nobr></span> </div><div style="position:absolute;top:707;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>way of incorporating state-space constraints into the learning</nobr></span> <nobr>将状态空间约束纳入学习的方式</nobr></span> </div><div style="position:absolute;top:722;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>process and analyze the learning gain by exploiting the sequential</nobr></span> <nobr>利用顺序处理和分析学习收益</nobr></span> </div><div style="position:absolute;top:737;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>structure of the stacking task.</nobr></span> <nobr>堆叠任务的结构。</nobr></span> </div><div style="position:absolute;top:762;left:203"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>I. I</nobr> <font style="font-size:9px">NTRODUCTION</font></span> <nobr>一，</nobr> <font style="font-size:9px">引言</font></span> </div><div style="position:absolute;top:784;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Over the last years, there has been substantial progress in</nobr></span> <nobr>过去几年，在这方面有了实质性的进展</nobr></span> </div><div style="position:absolute;top:802;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>robust manipulation in unstructured environments.</nobr></span> <nobr>在非结构化环境中强大的操纵。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>While ex-</nobr></span> <nobr>虽然ex-</nobr></span> </div><div style="position:absolute;top:820;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>isting techniques have the potential to solve various household</nobr></span> <nobr>技术有潜力解决各种家庭</nobr></span> </div><div style="position:absolute;top:838;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>manipulation tasks, they typically rely on extremely expensive</nobr></span> <nobr>操纵任务，他们通常依靠非常昂贵</nobr></span> </div><div style="position:absolute;top:856;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>robot hardware [</nobr> <a href="#8">12]</a> .</span> <nobr>机器人硬件[</nobr> <a href="#8">12]</a> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The long-term goal of our work is to</span>我们工作的长远目标是</span> </div><div style="position:absolute;top:874;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>develop affordable, light-weight manipulator systems that can</nobr></span> <nobr>开发经济实惠的轻型机械手系统</nobr></span> </div><div style="position:absolute;top:892;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>interactively play with children.</nobr></span> <nobr>与孩子们互动玩耍。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A key problem of cheap</nobr></span> <nobr>便宜的关键问题</nobr></span> </div><div style="position:absolute;top:910;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>manipulators, however, is their inaccuracy and the limited</nobr></span> <nobr>然而，操纵者是他们的不准确和有限的</nobr></span> </div><div style="position:absolute;top:928;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>sensor feedback, if any.</nobr></span> <nobr>传感器反馈，如果有的话。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In this paper, we show how to use a</nobr></span> <nobr>在本文中，我们将展示如何使用一个</nobr></span> </div><div style="position:absolute;top:946;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>cheap, off-the-shelf robotic manipulator ($370) and a Kinect-</nobr></span> <nobr>便宜的现成机器人操作器（370美元）和Kinect-</nobr></span> </div><div style="position:absolute;top:964;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>style</nobr> <a href="https://translate.google.com/translate?hl=zh-CN&amp;prev=_t&amp;sl=en&amp;tl=zh-CN&amp;u=http://www.xbox.com/kinect">(http://www.xbox.com/kinect</a> ) depth camera (&lt;$120) to</span> <nobr>风格</nobr> <a href="https://translate.google.com/translate?hl=zh-CN&amp;prev=_t&amp;sl=en&amp;tl=zh-CN&amp;u=http://www.xbox.com/kinect">（http://www.xbox.com/kinect</a> ）深度相机（&lt;$ 120）即可</span> </div><div style="position:absolute;top:982;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learn a block stacking task [</nobr> <a href="#8">2,</a> 1] under state-space constraints.</span>在状态空间约束下<nobr>学习块堆栈任务[</nobr> <a href="#8">2,1</a> ]。</span> </div><div style="position:absolute;top:1000;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We use data-efficient reinforcement learning (RL) to train a</nobr></span> <nobr>我们使用数据有效强化学习（RL）来训练一个</nobr></span> </div><div style="position:absolute;top:1018;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>controller directly in the work space of the depth camera.</nobr></span> <nobr>控制器直接在深度相机的工作空间中。</nobr></span> </div><div style="position:absolute;top:1035;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fully autonomous RL methods typically require many trials</nobr></span> <nobr>完全自主的RL方法通常需要许多试验</nobr></span> </div><div style="position:absolute;top:1053;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to successfully solve a task (eg, Q-learning), a good ini-</nobr></span> <nobr>要成功解决一个任务（如Q学习），一个良好的ini-</nobr></span> </div><div style="position:absolute;top:1071;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tialization (eg, by imitation</nobr> <a href="#8">[</a> 3]), or a deep understanding</span> <nobr>（例如，通过模仿</nobr> <a href="#8">[</a> 3]），或者深刻的理解</span> </div><div style="position:absolute;top:1089;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of the system.</nobr></span> <nobr>的系统。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>If this knowledge is unavailable, due to the</nobr></span> <nobr>如果这个知识不可用，由于</nobr></span> </div><div style="position:absolute;top:1107;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>lack of understanding of complicated dynamics or because a</nobr></span> <nobr>缺乏对复杂动态的理解，或者因为a</nobr></span> </div><div style="position:absolute;top:1125;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>solution is simply not known, data-intensive learning methods</nobr></span> <nobr>解决方案根本就是不知道的，数据密集的学习方法</nobr></span> </div><div style="position:absolute;top:1143;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>are required.</nobr></span> <nobr>是必要的。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In a robotic system, however, many physical</nobr></span> <nobr>然而，在一个机器人系统中，许多物理的</nobr></span> </div><div style="position:absolute;top:1161;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>interactions with the environment are often infeasible and lead</nobr></span> <nobr>与环境的相互作用往往是不可行的，并导致</nobr></span> </div><div style="position:absolute;top:1179;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to worn-out robots.</nobr></span> <nobr>破旧的机器人</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The more fragile a robotic system the more</nobr></span> <nobr>机器人系统越脆弱越多</nobr></span> </div><div style="position:absolute;top:1197;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>important data-efficient learning methods are.</nobr></span> <nobr>重要的数据有效的学习方法是。</nobr></span> </div><div style="position:absolute;top:1214;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>To sidestep these problems, we build on</nobr> <font style="font-size:9px">PILCO</font> (probabilis-</span> <nobr>为了避开这些问题，我们建立在</nobr> <font style="font-size:9px">PILCO</font> （概率 -</span> </div><div style="position:absolute;top:1232;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tic inference for learning control), a data-efficient model-based</nobr></span> <nobr>抽象推理为学习控制），一个数据高效的模型为基础</nobr></span> </div><div style="position:absolute;top:1250;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(indirect) policy search method [</nobr> <a href="#8">7</a> ] that reduces model bias,</span> <nobr>（间接）政策搜索方法[</nobr> <a href="#8">7</a> ]，减少模型偏见，</span> </div><div style="position:absolute;top:702;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig. 1. Low-cost robotic arm by Lynxmotion [1</nobr> <a href="#8">]</a> performing a block stacking</span> <nobr>图1. Lynxmotion的低成本机器人手臂[1</nobr> <a href="#8">]</a>进行块堆叠</span> </div><div style="position:absolute;top:716;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>task.</nobr></span> <nobr>任务。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Since the manipulator does not provide any pose feedback, our system</nobr></span> <nobr>由于操纵器不提供任何姿态反馈，所以我们的系统</nobr></span> </div><div style="position:absolute;top:729;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learns a controller directly in the task space using visual feedback from a</nobr></span> <nobr>直接在任务空间中学习一个控制器，使用视觉反馈</nobr></span> </div><div style="position:absolute;top:743;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Kinect-style depth camera.</nobr></span> <nobr>Kinect式深度相机。</nobr></span> </div><div style="position:absolute;top:770;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a typical problem of model-based methods: P</nobr> <font style="font-size:9px">ILCO</font> employs</span> <nobr>一个典型的基于模型的方法问题：P</nobr> <font style="font-size:9px">ILCO</font>使用</span> </div><div style="position:absolute;top:788;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a flexible probabilistic non-parametric Gaussian process (GP)</nobr></span> <nobr>灵活的概率非参数高斯过程（GP）</nobr></span> </div><div style="position:absolute;top:806;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dynamics model and takes model uncertainty consistently</nobr></span> <nobr>动力学模型，并持续模型不确定性</nobr></span> </div><div style="position:absolute;top:824;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>into account during long-term planning.</nobr></span> <nobr>在长期规划中考虑到。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>P</nobr> <font style="font-size:9px">ILCO</font> learns good</span> <nobr>P</nobr> <font style="font-size:9px">ILCO</font>学习良好</span> </div><div style="position:absolute;top:842;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>controllers from scratch, ie, with random initializations;</nobr></span> <nobr>控制器从头开始，即随机初始化;</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>no</nobr></span> <nobr>没有</nobr></span> </div><div style="position:absolute;top:860;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>deep understanding of the system is required.</nobr></span> <nobr>系统的深入理解是必需的。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In this paper, we</nobr></span> <nobr>在本文中，我们</nobr></span> </div><div style="position:absolute;top:878;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>show how obstacle information provided by the depth camera</nobr></span> <nobr>显示深度相机提供的障碍信息</nobr></span> </div><div style="position:absolute;top:896;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>can be incorporated into</nobr> <font style="font-size:9px">PILCO</font> ’s planning and learning to</span> <nobr>可以纳入</nobr> <font style="font-size:9px">PILCO</font>的规划和学习</span> </div><div style="position:absolute;top:914;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>avoid collisions even during training, and how knowledge can</nobr></span> <nobr>甚至在训练期间避免碰撞，以及知识如何</nobr></span> </div><div style="position:absolute;top:932;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>be efficiently transferred across related tasks.</nobr></span> <nobr>有效地转移到相关的任务。</nobr></span> </div><div style="position:absolute;top:949;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The paper is structured as follows.</nobr></span> <nobr>本文结构如下。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>After discussing related</nobr></span> <nobr>经过讨论相关</nobr></span> </div><div style="position:absolute;top:967;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>work, we describe the task to be solved, the low-cost hardware</nobr></span> <nobr>工作，我们描述要解决的任务，低成本的硬件</nobr></span> </div><div style="position:absolute;top:985;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>used, and a basic tracking algorithm in Sec.</nobr></span> <nobr>在第二部分中使用了一个基本的跟踪算法。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#2">III</a> .</span> <a href="#2">三</a> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Sec.</span>秒。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">IV sum-</span> IV sum-</span> </div><div style="position:absolute;top:1003;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>marizes the</nobr> <font style="font-size:9px">PILCO</font> framework and details how we incorporate</span> <nobr>marizes</nobr> <font style="font-size:9px">PILCO</font>框架和细节我们如何合并</span> </div><div style="position:absolute;top:1021;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>collision avoidance into long-term planning under uncertainty.</nobr></span> <nobr>避免在不确定情况下进行长期规划。</nobr></span> </div><div style="position:absolute;top:1039;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Sec.</nobr></span> <nobr>秒。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#5">V</a> presents the experimental results.</span> <a href="#5">V</a>给出了实验结果。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Sec.</span>秒。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">VI concludes</span>六，总结</span> </div><div style="position:absolute;top:1057;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the paper with a discussion.</nobr></span> <nobr>本文与讨论。</nobr></span> </div><div style="position:absolute;top:1084;left:592"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>II.</nobr></span> <nobr>II。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>R</nobr> <font style="font-size:9px">ELATED</font> W <font style="font-size:9px">ORK</font></span> <nobr>R</nobr> <font style="font-size:9px">ELATED</font> W <font style="font-size:9px">ORK</font></span> </div><div style="position:absolute;top:1107;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In [</nobr> <a href="#8">11</a> ], a model-free policy learning method is presented,</span> <nobr>在[</nobr> <a href="#8">11</a> ]中，提出了一种无模型的策略学习方法，</span> </div><div style="position:absolute;top:1125;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>which relies on rollouts sampled from the system.</nobr></span> <nobr>这依赖于从系统采样的卷展栏。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Even in a</nobr></span> <nobr>即使在一个</nobr></span> </div><div style="position:absolute;top:1143;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>simple task (mountain-car) with only two policy parameters,</nobr></span> <nobr>简单的任务（山车）只有两个政策参数，</nobr></span> </div><div style="position:absolute;top:1161;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>80 rollouts are required.</nobr></span> <nobr>需要80个推出。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>For more complicated tasks, the</nobr></span> <nobr>对于更复杂的任务，</nobr></span> </div><div style="position:absolute;top:1178;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>number of required rollouts quickly goes into the thousands.</nobr></span> <nobr>所需的部署数量迅速进入数千人。</nobr></span> </div><div style="position:absolute;top:1196;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#8">[5</a> ] propose a consistent learning-planning method in par-</span> <a href="#8">[5</a> ]提出一个一致的学习规划方法，</span> </div><div style="position:absolute;top:1214;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tially observable domains.</nobr></span> <nobr>可观察的域。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A compact model of a discrete</nobr></span> <nobr>紧凑的离散模型</nobr></span> </div><div style="position:absolute;top:1232;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>latent space is learned and used for control learning by means</nobr></span> <nobr>通过手段学习潜在空间并用于控制学习</nobr></span> </div><div style="position:absolute;top:1250;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of point-based value iteration</nobr> <a href="#8">[16</a> ].</span> <nobr>基于点的值迭代</nobr> <a href="#8">[16</a> ]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The approach in [5] for</span>在[5]中的做法</span> </div><div style="position:absolute;top:980;left:256"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:984;left:793"><a href="#2" style="background-color:#0000ff;padding:16px 18px;"></a></div><div style="position:absolute;top:1037;left:761"><a href="#7" style="background-color:#0000ff;padding:17px 19px;"></a></div><div style="position:absolute;top:1249;left:807"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:1363;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="2"><b>Page 2</b></a></font></span> <font face="arial,sans-serif"><a name="2"><b>第2页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:1452;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learning a latent-space dynamics model requires thousands</nobr></span> <nobr>学习一个潜在的空间动力学模型需要数千个</nobr></span> </div><div style="position:absolute;top:1470;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of trajectories.</nobr></span> <nobr>的轨迹。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Furthermore, it does not naturally deal with</nobr></span> <nobr>而且，它自然不会处理</nobr></span> </div><div style="position:absolute;top:1488;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>continuous (latent) domains or model uncertainty.</nobr></span> <nobr>连续（潜伏）域或模型不确定性。</nobr></span> </div><div style="position:absolute;top:1506;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In recent years, GP dynamics models were more often used</nobr></span> <nobr>近年来，GP动力学模型更常用</nobr></span> </div><div style="position:absolute;top:1524;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>for learning robot dynamics [</nobr> <a href="#8">9,</a> 10, 14].</span> <nobr>用于学习机器人动力学[</nobr> <a href="#8">9，</a> 10，14]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">However, they are usu-</span>但是，</span> </div><div style="position:absolute;top:1542;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ally not used for long-term planning and policy learning, but</nobr></span> <nobr>不是用于长期规划和政策学习，而是</nobr></span> </div><div style="position:absolute;top:1560;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>rather for myopic control and trajectory following.</nobr></span> <nobr>而不是用于近视控制和弹道跟踪。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Typically,</nobr></span> <nobr>通常情况下，</nobr></span> </div><div style="position:absolute;top:1577;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the training data for the GP dynamics models are obtained</nobr></span> <nobr>获得GP动力学模型的训练数据</nobr></span> </div><div style="position:absolute;top:1595;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>either by motor babbling [</nobr> <a href="#8">9</a> ] or by demonstrations [14].</span> <nobr>无论是通过电机bab [[</nobr> <a href="#8">9</a> ]还是通过示威[14]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">For</span>对于</span> </div><div style="position:absolute;top:1613;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the purpose of data-efficient fully autonomous learning, these</nobr></span> <nobr>数据有效的完全自主学习的目的，这些</nobr></span> </div><div style="position:absolute;top:1631;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>approaches are not suitable: Motor babbling is data-inefficient</nobr></span> <nobr>方法并不合适：电机混音是数据低效的</nobr></span> </div><div style="position:absolute;top:1649;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and does not guarantee good models along a good trajectory;</nobr></span> <nobr>并不能保证良好的模型沿着一个良好的轨迹;</nobr></span> </div><div style="position:absolute;top:1667;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>demonstrations would contradict fully autonomous learning.</nobr></span> <nobr>示威将与完全自主学习相矛盾。</nobr></span> </div><div style="position:absolute;top:1685;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Other algorithms that use GP dynamics models in an RL</nobr></span> <nobr>在RL中使用GP动力学模型的其他算法</nobr></span> </div><div style="position:absolute;top:1703;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>setup were proposed in [20</nobr> <a href="#8">,</a> 8].</span> <nobr>设置在[20提出</nobr> <a href="#8">，</a> 8]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">In [20, 8], value function mod-</span>在[20,8]中，值函数mod-</span> </div><div style="position:absolute;top:1721;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>els have to be maintained, which becomes difficult in higher-</nobr></span> <nobr>需要维护，这在高等教育中变得困难，</nobr></span> </div><div style="position:absolute;top:1739;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dimensional state spaces.</nobr></span> <nobr>三维状态空间。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Although the approaches in [</nobr> <a href="#8">20,</a> 8]</span> <nobr>虽然在方法[</nobr> <a href="#8">20，</a> 8]</span> </div><div style="position:absolute;top:1757;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>do long-term planning for finding a policy, they cannot directly</nobr></span> <nobr>做长远规划寻找政策，他们不能直接</nobr></span> </div><div style="position:absolute;top:1775;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>deal with constraints in the state space (eg, obstacles).</nobr></span> <nobr>处理状态空间中的约束（例如障碍物）。</nobr></span> </div><div style="position:absolute;top:1793;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Model-based RL methods are typically better suited for</nobr></span> <nobr>基于模型的RL方法通常更适合于</nobr></span> </div><div style="position:absolute;top:1811;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>data-efficient learning than model-free methods.</nobr></span> <nobr>数据有效的学习比无模式的方法。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>However, they</nobr></span> <nobr>但是，他们</nobr></span> </div><div style="position:absolute;top:1829;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>often employ a certainty equivalence assumption [</nobr> <a href="#8">22,</a> 23, 4]</span> <nobr>往往采用确定性等价假设[</nobr> <a href="#8">22,23,4</a> ]</span> </div><div style="position:absolute;top:1847;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>by assuming that the learned model is a good approxima-</nobr></span> <nobr>通过假设学习模型是一个好的近似值，</nobr></span> </div><div style="position:absolute;top:1865;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tion of the latent system dynamics.</nobr></span> <nobr>潜在的系统动力学。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>This assumption leads</nobr></span> <nobr>这个假设导致</nobr></span> </div><div style="position:absolute;top:1883;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to “model bias”, which often makes learning from scratch</nobr></span> <nobr>到“模式偏见”，这往往使得从头开始学习</nobr></span> </div><div style="position:absolute;top:1901;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>“daunting” [</nobr> <a href="#8">22]</a> , especially when only a few samples from the</span> <nobr>“令人畏惧”[</nobr> <a href="#8">22]</a> ，尤其是当只有几个样本来自</span> </div><div style="position:absolute;top:1919;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>system are available.</nobr></span> <nobr>系统可用。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Reducing model bias requires accounting</nobr></span> <nobr>减少模型偏差需要会计</nobr></span> </div><div style="position:absolute;top:1936;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>for model uncertainty during planning [</nobr> <a href="#8">23]</a> .</span> <nobr>在规划中模型的不确定性[</nobr> <a href="#8">23]</a> 。</span> </div><div style="position:absolute;top:1955;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Unlike most other model-based approaches,</nobr> <font style="font-size:9px">PILCO</font> [ <a href="#8">6</a> , 7]</span> <nobr>与大多数其他基于模型的方法不同，</nobr> <font style="font-size:9px">PILCO</font> [ <a href="#8">6,7</a> ]</span> </div><div style="position:absolute;top:1972;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>does not make a certainty equivalence assumption on the</nobr></span> <nobr>并没有把确定性等价假设放在这个上</nobr></span> </div><div style="position:absolute;top:1990;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learned model or simply take the maximum likelihood model.</nobr></span> <nobr>学习模型或简单地采取最大似然模型。</nobr></span> </div><div style="position:absolute;top:2008;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Instead, it learns a probabilistic dynamics model and explicitly</nobr></span> <nobr>相反，它明确地学习了概率动力学模型</nobr></span> </div><div style="position:absolute;top:2026;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>incorporates model uncertainty into long-term planning [</nobr> <a href="#8">7]</a> .</span> <nobr>将模型的不确定性纳入长期规划[</nobr> <a href="#8">7]</a> 。</span> </div><div style="position:absolute;top:2044;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Unlike [</nobr> <a href="#8">23,</a> 4, 20, 8], P <font style="font-size:9px">ILCO</font> , however, neither requires sam-</span> <nobr>不像[</nobr> <a href="#8">23</a> 4，20％，8]，P <font style="font-size:9px">ILCO，</font>然而，既不要求SAM-</span> </div><div style="position:absolute;top:2062;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>pling methods for planning, nor needs to maintain an explicit</nobr></span> <nobr>用于规划的方法，也不需要保持明确</nobr></span> </div><div style="position:absolute;top:2080;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>value function model.</nobr></span> <nobr>价值函数模型。</nobr></span> </div><div style="position:absolute;top:2098;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In [</nobr> <a href="#8">18</a> ], the authors also aim at developing low-cost ma-</span> <nobr>在[</nobr> <a href="#8">18</a> ]中，作者也旨在开发低成本的ma-</span> </div><div style="position:absolute;top:2116;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>nipulators.</nobr></span> <nobr>nipulators。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>However, while their focus is on building novel</nobr></span> <nobr>然而，他们的重点是建设小说</nobr></span> </div><div style="position:absolute;top:2134;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>manipulation hardware equipped with sufficient sensing, our</nobr></span> <nobr>操控硬件配备足够的感应，我们的</nobr></span> </div><div style="position:absolute;top:2152;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>goal is to develop reasoning algorithms to be used with cheap</nobr></span> <nobr>目标是开发推理算法，以便于使用</nobr></span> </div><div style="position:absolute;top:2170;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>off-the-shelf systems.</nobr></span> <nobr>现成的系统。</nobr></span> </div><div style="position:absolute;top:2199;left:197"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>III.</nobr></span> <nobr>III。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>P</nobr> <font style="font-size:9px">RELIMINARIES</font></span> <nobr>P</nobr> <font style="font-size:9px">RELIMINARIES</font></span> </div><div style="position:absolute;top:2223;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In this paper, we describe how a low-precision robotic</nobr></span> <nobr>在本文中，我们描述了一个低精度的机器人</nobr></span> </div><div style="position:absolute;top:2241;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>arm can learn to stack a tower of foam blocks—fully au-</nobr></span> <nobr>手臂可以学习堆叠一个泡沫块塔 - 完全au  - </nobr></span> </div><div style="position:absolute;top:2259;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tonomously.</nobr></span> <nobr>tonomously。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We employ the following assumptions: First,</nobr></span> <nobr>我们采用以下假设：首先，</nobr></span> </div><div style="position:absolute;top:2277;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>since grasping is not the focus of this work, we assume that the</nobr></span> <nobr>既然把握不是这项工作的重点，那么我们就假设这个</nobr></span> </div><div style="position:absolute;top:2294;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>block is placed in the robot's gripper.</nobr></span> <nobr>块被放置在机器人的抓手。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Second, the arm's joint</nobr></span> <nobr>其次，手臂的关节</nobr></span> </div><div style="position:absolute;top:2312;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>angles and velocities are not measured internally.</nobr></span> <nobr>角度和速度不是在内部测量的。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>However,</nobr></span> <nobr>然而，</nobr></span> </div><div style="position:absolute;top:2330;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the location of the center of the block in the robot's gripper</nobr></span> <nobr>机器人手爪中心块的位置</nobr></span> </div><div style="position:absolute;top:2348;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>can be determined using the depth camera.</nobr></span> <nobr>可以使用深度相机来确定。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Third, no desired</nobr></span> <nobr>第三，不需要</nobr></span> </div><div style="position:absolute;top:2366;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>path/trajectory is a priori known.</nobr></span> <nobr>路径/轨迹是先验已知的。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>This also excludes human</nobr></span> <nobr>这也排除了人类</nobr></span> </div><div style="position:absolute;top:2384;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>demonstrations.</nobr></span> <nobr>示威。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fourth, we assume that the initial location and</nobr></span> <nobr>第四，我们假设最初的位置和</nobr></span> </div><div style="position:absolute;top:2402;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the target location of the block in the gripper are fixed.</nobr></span> <nobr>夹具中块的目标位置是固定的。</nobr></span> </div><div style="position:absolute;top:2420;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Trajectory-following methods such as Jacobian-transpose</nobr></span> <nobr>轨迹跟随方法，如雅可比转置</nobr></span> </div><div style="position:absolute;top:2438;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>control [</nobr> <a href="#8">13</a> ] are not suitable in our case: A desired trajectory</span> <nobr>控制[</nobr> <a href="#8">13</a> ]是不适合我们的情况：一个理想的轨迹</span> </div><div style="position:absolute;top:1452;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>is not known in advance.</nobr></span> <nobr>事先不知道。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Simply following a straight path</nobr></span> <nobr>简单地遵循直线路径</nobr></span> </div><div style="position:absolute;top:1470;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>between the initial and the target state might not succeed</nobr></span> <nobr>初始状态和目标状态之间可能不会成功</nobr></span> </div><div style="position:absolute;top:1488;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>due to obstacles (eg, partial stack).</nobr></span> <nobr>由于障碍（例如，部分堆叠）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We furthermore have</nobr></span> <nobr>我们还有</nobr></span> </div><div style="position:absolute;top:1506;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to cope with multiple sources of uncertainty: camera noise,</nobr></span> <nobr>以应对多种不确定性来源：相机噪声，</nobr></span> </div><div style="position:absolute;top:1524;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>time synchronization (camera/controller), idealized assump-</nobr></span> <nobr>时间同步（摄像机/控制器），理想化的假设 - </nobr></span> </div><div style="position:absolute;top:1541;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tions (eg, constant duration between measurements), delays,</nobr></span> <nobr>（例如，测量之间的持续时间），延迟，</nobr></span> </div><div style="position:absolute;top:1559;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>image processing noise, and robot arm noise.</nobr></span> <nobr>图像处理噪声和机器人手臂噪声。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The camera</nobr></span> <nobr>相机</nobr></span> </div><div style="position:absolute;top:1577;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>noise and the robot arm noise are the major noise sources, see</nobr></span> <nobr>噪声和机器人手臂噪声是主要噪声源，请参阅</nobr></span> </div><div style="position:absolute;top:1595;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Sec.</nobr></span> <nobr>秒。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#2">III-A</a> for details.</span> <a href="#2">III-A</a>的细节。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">For long-term planning and controller</span>用于长期规划和控制器</span> </div><div style="position:absolute;top:1613;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learning, all these uncertainties have to be taken into account.</nobr></span> <nobr>学习，所有这些不确定性都必须考虑在内。</nobr></span> </div><div style="position:absolute;top:1643;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A. Hardware Description</nobr></span> <nobr>A.硬件描述</nobr></span> </div><div style="position:absolute;top:1667;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We use a lightweight robotic arm by Lynxmotion [1</nobr> <a href="#8">]</a> , see</span> <nobr>我们使用Lynxmotion的轻量级机器人手臂[1</nobr> <a href="#8">]</a> ，参见</span> </div><div style="position:absolute;top:1685;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>also Fig.</nobr> <a href="#1">1</a> .</span> <nobr>也是图</nobr> <a href="#1">1</a> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The arm costs approximately $370 and has six</span>手臂花费约370美元，有六个</span> </div><div style="position:absolute;top:1703;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>controllable degrees of freedom: base rotate, three joints,</nobr></span> <nobr>可控自由度：底座旋转，三关节，</nobr></span> </div><div style="position:absolute;top:1721;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>wrist rotate, and a gripper (open/close).</nobr></span> <nobr>手腕旋转，抓手（打开/关闭）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The plastic arm can</nobr></span> <nobr>塑料手臂可以</nobr></span> </div><div style="position:absolute;top:1738;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>be controlled by commanding both a desired configuration</nobr></span> <nobr>通过控制所需的配置来控制</nobr></span> </div><div style="position:absolute;top:1756;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of the six servos (via their pulse durations, which range</nobr></span> <nobr>在六个舵机（通过他们的脉冲持续时间，范围</nobr></span> </div><div style="position:absolute;top:1774;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>from 0.75 ms–2.25 ms) and the duration for executing the</nobr></span> <nobr>从0.75毫秒-2.25毫秒）和执行的时间</nobr></span> </div><div style="position:absolute;top:1792;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>command.</nobr></span> <nobr>命令。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The arm is very noisy: Tapping on the base makes</nobr></span> <nobr>手臂非常嘈杂：敲击基地使</nobr></span> </div><div style="position:absolute;top:1810;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the end effector swing in a radius of about 2 cm.</nobr></span> <nobr>末端执行器摆动约2厘米的半径。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The system</nobr></span> <nobr>系统</nobr></span> </div><div style="position:absolute;top:1828;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>noise is especially pronounced when moving the arm vertically</nobr></span> <nobr>当垂直移动手臂时，噪音尤其明显</nobr></span> </div><div style="position:absolute;top:1846;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(up/down).</nobr></span> <nobr>（向上/向下）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The robotic arm is shipped without any sensors.</nobr></span> <nobr>机器人手臂没有任何传感器。</nobr></span> </div><div style="position:absolute;top:1864;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Thus, neither the joint angles nor the configuration of the</nobr></span> <nobr>因此，无论是关节角度还是配置</nobr></span> </div><div style="position:absolute;top:1882;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>servos can be obtained directly.</nobr></span> <nobr>舵机可以直接获得。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Instead of equipping the robot</nobr></span> <nobr>而不是装备机器人</nobr></span> </div><div style="position:absolute;top:1900;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with further sensors and/or markers, we demonstrate that good</nobr></span> <nobr>与进一步的传感器和/或标记，我们证明了这一点</nobr></span> </div><div style="position:absolute;top:1918;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>controllers can be learned without additional information.</nobr></span> <nobr>控制器可以在没有附加信息的情况</nobr></span> </div><div style="position:absolute;top:1936;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We use a PrimeSense depth camera [2]</nobr></span> <nobr>我们使用PrimeSense深度相机[2]</nobr></span> <nobr><a href="#8"> </a></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">for visual tracking.</span>进行视觉追踪。</span> </div><div style="position:absolute;top:1954;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The camera is identical to the Kinect sensor, providing a</nobr></span> <nobr>相机与Kinect传感器相同，提供了一个</nobr></span> </div><div style="position:absolute;top:1972;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>synchronized depth image and a 640×480 color (RGB) image</nobr></span> <nobr>同步深度图像和640×480彩色（RGB）图像</nobr></span> </div><div style="position:absolute;top:1990;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>at 30 Hz.</nobr></span> <nobr>在30赫兹。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Using structured light, the camera delivers useful</nobr></span> <nobr>使用结构光，相机提供有用的</nobr></span> </div><div style="position:absolute;top:2008;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>depth information of objects in a range of about 0.5m–5m.</nobr></span> <nobr>深度信息在0.5m-5m范围内。</nobr></span> </div><div style="position:absolute;top:2026;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The depth resolution is approximately 1 cm at 2m distance [</nobr> <a href="#8">2]</a> .</span> <nobr>深度分辨率在2米距离处约为1厘米[</nobr> <a href="#8">2]</a> 。</span> </div><div style="position:absolute;top:2044;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The total cost of the robot and the camera is about $500.</nobr></span> <nobr>机器人和相机的总成本大约是500美元。</nobr></span> </div><div style="position:absolute;top:2061;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ROS [19</nobr> <a href="#8">]</a> handles the communication with the hardware.</span> <nobr>ROS [19</nobr> <a href="#8">]</a>处理与硬件的通信。</span> </div><div style="position:absolute;top:2092;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B. Block Tracking</nobr></span> <nobr>B.块跟踪</nobr></span> </div><div style="position:absolute;top:2115;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>At every time step, the robot uses the center of the block</nobr></span> <nobr>在每个时间步，机器人使用块的中心</nobr></span> </div><div style="position:absolute;top:2133;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>in its gripper to compute a continuous-valued control signal</nobr></span> <nobr>在其夹持器中计算连续值的控制信号</nobr></span> </div><div style="position:absolute;top:2151;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>u ∈ R</nobr> <font style="font-size:8px">4</font> , which comprises the commanded pulse widths for</span> <nobr>ü∈R</nobr> <font style="font-size:8px">4，</font>其包括用于指令的脉冲宽度</span> </div><div style="position:absolute;top:2169;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the first four servo motors.</nobr></span> <nobr>前四个伺服电机。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Wrist rotation and gripper opening/</nobr></span> <nobr>手腕旋转和抓手打开/</nobr></span> </div><div style="position:absolute;top:2187;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>closing are not learned.</nobr></span> <nobr>关闭不学习。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>For tracking the block in the gripper of</nobr></span> <nobr>用于跟踪夹具中的块</nobr></span> </div><div style="position:absolute;top:2205;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the robot arm, we use a simple but fast blob tracking algorithm.</nobr></span> <nobr>机器人手臂，我们使用一个简单但快速的斑点追踪算法。</nobr></span> </div><div style="position:absolute;top:2223;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>At the beginning of an experiment, the user marks the object in</nobr></span> <nobr>在实验开始时，用户将对象标记进去</nobr></span> </div><div style="position:absolute;top:2241;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the gripper of the robot by clicking on it in a display.</nobr></span> <nobr>通过在显示器上点击机器人的抓手。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Assuming</nobr></span> <nobr>假设</nobr></span> </div><div style="position:absolute;top:2258;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>that the object has a uniform color, we use color-based region</nobr></span> <nobr>该对象具有统一的颜色，我们使用基于颜色的区域</nobr></span> </div><div style="position:absolute;top:2276;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>growing starting at the clicked pixel to estimate the extent and</nobr></span> <nobr>从点击像素开始增长以估计范围</nobr></span> </div><div style="position:absolute;top:2294;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>3D center of the object, which is used as the state x ∈ R</nobr> <font style="font-size:8px">3</font> by</span> <nobr>的对象，它被用作状态X∈R</nobr> <font style="font-size:8px">3</font>通过<nobr>的3D中心</nobr></span> </div><div style="position:absolute;top:2312;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the RL algorithm.</nobr></span> <nobr>RL算法。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Finding the 3D center of the block requires</nobr></span> <nobr>寻找块的3D中心需要</nobr></span> </div><div style="position:absolute;top:2330;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>less than 0.02 s per frame.</nobr></span> <nobr>每帧少于0.02秒。</nobr></span> </div><div style="position:absolute;top:2360;left:470"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>IV.</nobr></span> <nobr>IV。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>P</nobr> <font style="font-size:9px">OLICY</font> L <font style="font-size:9px">EARNING WITH</font> S <font style="font-size:9px">TATE</font> -S <font style="font-size:9px">PACE</font> C <font style="font-size:9px">ONSTRAINTS</font></span> <font style="font-size:9px">带</font> S <font style="font-size:9px">TATE</font> -S <font style="font-size:9px"><font style="font-size:9px">PACEÇONSTRAINTS</font></font> <nobr>P</nobr> <font style="font-size:9px">OLICY</font>大号<font style="font-size:9px">劳动报酬</font></span> </div><div style="position:absolute;top:2384;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In the following, we summarize the</nobr> <font style="font-size:9px">PILCO</font> -framework [ <a href="#8">6,</a> 7]</span> <nobr>在下面，我们总结</nobr> <font style="font-size:9px">PILCO</font>框架[ <a href="#8">6,7</a> ]</span> </div><div style="position:absolute;top:2402;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>for learning a good closed-loop policy (state-feedback con-</nobr></span> <nobr>为了学习一个好的闭环策略（状态反馈控制</nobr></span> </div><div style="position:absolute;top:2420;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>troller) π : R</nobr> <font style="font-size:8px">3</font> → R <font style="font-size:8px">4</font> , x ↦→ u.</span> <nobr>troller）π：R</nobr> <font style="font-size:8px">3</font> →R <font style="font-size:8px">4</font> ，x↦→u。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Here, x is called the state</span>在这里，x被称为状态</span> </div><div style="position:absolute;top:2438;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>defined as the coordinates of the center (x</nobr> <font style="font-size:8px">c</font> ,y <font style="font-size:8px">c</font> ,z <font style="font-size:8px">c</font> ) of the block</span> <nobr>定义为块的中心坐标（x</nobr> <font style="font-size:8px">c</font> ，y <font style="font-size:8px">c</font> ，z <font style="font-size:8px">c</font> ）</span> </div><div style="position:absolute;top:1522;left:262"><a href="#8" style="background-color:#0000ff;padding:14px 17px;"></a></div><div style="position:absolute;top:1522;left:284"><a href="#8" style="background-color:#0000ff;padding:13px 18px;"></a></div><div style="position:absolute;top:1594;left:398"><a href="#8" style="background-color:#0000ff;padding:13px 18px;"></a></div><div style="position:absolute;top:1702;left:243"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:1702;left:284"><a href="#8" style="background-color:#0000ff;padding:13px 18px;"></a></div><div style="position:absolute;top:1702;left:307"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:1737;left:436"><a href="#8" style="background-color:#0000ff;padding:14px 11px;"></a></div><div style="position:absolute;top:1827;left:411"><a href="#8" style="background-color:#0000ff;padding:14px 18px;"></a></div><div style="position:absolute;top:1827;left:436"><a href="#8" style="background-color:#0000ff;padding:13px 11px;"></a></div><div style="position:absolute;top:1953;left:436"><a href="#8" style="background-color:#0000ff;padding:13px 11px;"></a></div><div style="position:absolute;top:2043;left:147"><a href="#8" style="background-color:#0000ff;padding:13px 11px;"></a></div><div style="position:absolute;top:2043;left:164"><a href="#8" style="background-color:#0000ff;padding:13px 18px;"></a></div><div style="position:absolute;top:2043;left:188"><a href="#8" style="background-color:#0000ff;padding:13px 11px;"></a></div><div style="position:absolute;top:2383;left:831"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:2551;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="3"><b>Page 3</b></a></font></span> <font face="arial,sans-serif"><a name="3"><b>第3页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:2636;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Algorithm 1</nobr> <font style="font-size:9px">PILCO</font></span> <nobr>算法1</nobr> <font style="font-size:9px">PILCO</font></span> </div><div style="position:absolute;top:2658;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1:</nobr> <font style="font-size:12px">init: Set controller parameters ψ to random.</font></span> <nobr>1：</nobr> <font style="font-size:12px">init：将控制器参数ψ设置为随机值。</font></span> </div><div style="position:absolute;top:2676;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2:</nobr> <font style="font-size:12px">Apply random control signals and record data.</font></span> <nobr>2：</nobr> <font style="font-size:12px">应用随机控制信号并记录数据。</font></span> </div><div style="position:absolute;top:2694;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>3:</nobr> <font style="font-size:12px">repeat</font></span> <nobr>3：</nobr> <font style="font-size:12px">重复</font></span> </div><div style="position:absolute;top:2712;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>4:</nobr></span> <nobr>4：</nobr></span> </div><div style="position:absolute;top:2710;left:121"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Learn probabilistic GP dynamics model using all data</nobr></span> <nobr>学习使用所有数据的概率性GP动力学模型</nobr></span> </div><div style="position:absolute;top:2730;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>5:</nobr></span> <nobr>5：</nobr></span> </div><div style="position:absolute;top:2728;left:121"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>repeat</nobr></span> <nobr>重复</nobr></span> </div><div style="position:absolute;top:2728;left:274"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>⊳ Model-based policy search</nobr></span> <nobr>⊳基于模型的策略搜索</nobr></span> </div><div style="position:absolute;top:2748;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>6:</nobr></span> <nobr>6：</nobr></span> </div><div style="position:absolute;top:2746;left:144"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Approx.</nobr></span> <nobr>约。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>inference for policy evaluation: get J</nobr> <font style="font-size:8px">π</font> (ψ)</span> <nobr>推导政策评估：得到</nobr> <font style="font-size:8px">Jπ</font> （ψ）</span> </div><div style="position:absolute;top:2766;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>7:</nobr></span> <nobr>7：</nobr></span> </div><div style="position:absolute;top:2764;left:144"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Gradients dJ</nobr> <font style="font-size:8px">π</font> (ψ)/ dψ for policy improvement</span> <nobr>梯度<font style="font-size:8px">DJπ（ψ）</font></nobr>政策改进/dψ</span> </div><div style="position:absolute;top:2784;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>8:</nobr></span> <nobr>8：</nobr></span> </div><div style="position:absolute;top:2782;left:144"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Update parameters ψ (eg, CG or L-BFGS).</nobr></span> <nobr>更新参数ψ（例如CG或L-BFGS）。</nobr></span> </div><div style="position:absolute;top:2802;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>9:</nobr></span> <nobr>9：</nobr></span> </div><div style="position:absolute;top:2800;left:121"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>until convergence;</nobr></span> <nobr>直至收敛;</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>return ψ</nobr></span> <nobr>返回ψ</nobr></span> </div><div style="position:absolute;top:2795;left:298"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∗</nobr></span> <nobr>*</nobr></span> </div><div style="position:absolute;top:2820;left:76"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>10:</nobr></span> <nobr>10：</nobr></span> </div><div style="position:absolute;top:2818;left:121"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Set π</nobr> <font style="font-size:8px">∗</font> ← π(ψ <font style="font-size:8px">∗</font></span> <nobr>设π</nobr> <font style="font-size:8px">*</font> ←π（ψ <font style="font-size:8px">*</font></span> </div><div style="position:absolute;top:2817;left:218"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>).</nobr></span> <nobr>）。</nobr></span> </div><div style="position:absolute;top:2838;left:76"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>11:</nobr></span> <nobr>11：</nobr></span> </div><div style="position:absolute;top:2836;left:121"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Apply π</nobr> <font style="font-size:8px">∗</font> to robot (single trial/episode);</span> <nobr>将π</nobr> <font style="font-size:8px">*</font>应用于机器人（单个试听/插曲）;</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">record data.</span>记录数据。</span> </div><div style="position:absolute;top:2856;left:76"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>12:</nobr> <font style="font-size:12px">until task learned</font></span> <nobr>12：</nobr> <font style="font-size:12px">直到任务学习</font></span> </div><div style="position:absolute;top:2910;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>in the gripper.</nobr></span> <nobr>在夹子里。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We attempt to learn this policy from scratch,</nobr></span> <nobr>我们试图从头学习这个政策，</nobr></span> </div><div style="position:absolute;top:2928;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ie, with only very general prior knowledge about the task and</nobr></span> <nobr>即只有非常一般的关于任务的先验知识</nobr></span> </div><div style="position:absolute;top:2945;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the solution itself.</nobr></span> <nobr>解决方案本身。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Moreover, we want to find π in only a few</nobr></span> <nobr>而且，我们只想在几个中找到π</nobr></span> </div><div style="position:absolute;top:2963;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>trials, ie, we require a data-efficient learning method.</nobr></span> <nobr>试验，即我们需要一个数据有效的学习方法。</nobr></span> </div><div style="position:absolute;top:2982;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>As a criterion to judge the performance of a controller π,</nobr></span> <nobr>作为判断控制器π的性能的标准，</nobr></span> </div><div style="position:absolute;top:3000;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>we use the long-term expected return</nobr></span> <nobr>我们使用长期的预期回报</nobr></span> </div><div style="position:absolute;top:3033;left:185"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>J</nobr> <font style="font-size:8px">π</font> =</span> <nobr><font style="font-size:8px">Ĵπ=</font></nobr></span> </div><div style="position:absolute;top:3029;left:223"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∑</nobr> <font style="font-size:8px">T</font></span> <nobr><font style="font-size:8px">ΣŤ</font></nobr></span> </div><div style="position:absolute;top:3045;left:244"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>t=0</nobr></span> <nobr>t = 0时</nobr></span> </div><div style="position:absolute;top:3033;left:267"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>E</nobr> <font style="font-size:8px">x</font> <font style="font-size:5px">t</font> [c(x <font style="font-size:8px">t</font> )] ,</span> <nobr>E</nobr> <font style="font-size:8px">x</font> <font style="font-size:5px">t</font> [c（x <font style="font-size:8px">t</font> ）]，</span> </div><div style="position:absolute;top:3033;left:433"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(1)</nobr></span> <nobr>（1）</nobr></span> </div><div style="position:absolute;top:3065;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of a trajectory (x</nobr> <font style="font-size:8px">0</font> ,..., x <font style="font-size:8px">T</font> ) when applying π.</span>当应用π时<nobr>轨迹（x</nobr> <font style="font-size:8px">0</font> ，...，x <font style="font-size:8px">T</font> ） <nobr>的轨迹</nobr> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">In Eq.</span>在等式</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">( <a href="#3">1)</a> ,</span> （ <a href="#3">1）</a> ，</span> </div><div style="position:absolute;top:3082;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>T is the prediction horizon and c(x</nobr> <font style="font-size:8px">t</font> ) is the instantaneous</span> <nobr>T是预测视界，c（x</nobr> <font style="font-size:8px">t</font> ）是瞬时的</span> </div><div style="position:absolute;top:3101;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>cost of being in state x at time t.</nobr></span> <nobr>在时间t处于状态x的成本。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>If not stated otherwise,</nobr></span> <nobr>如果没有另外说明，</nobr></span> </div><div style="position:absolute;top:3119;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>throughout this paper, we use a saturating cost function</nobr></span> <nobr>在本文中，我们使用饱和成本函数</nobr></span> </div><div style="position:absolute;top:3136;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c = − exp(−d</nobr> <font style="font-size:8px">2</font> /σ <font style="font-size:8px">2</font></span> <nobr>C = - EXP（-d</nobr> <font style="font-size:8px">2</font> <font style="font-size:8px">/σ2</font></span> </div><div style="position:absolute;top:3143;left:188"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c</nobr> <font style="font-size:12px">) that penalizes Euclidean distances d of</font></span> <nobr>c</nobr> <font style="font-size:12px">）惩罚欧几里德距离d</font></span> </div><div style="position:absolute;top:3154;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the block in the end effector from the target location x</nobr> <font style="font-size:8px">target</font> .</span> <nobr>从目标位置x</nobr> <font style="font-size:8px">目标</font> <nobr>的末端执行器中的块</nobr> 。</span> </div><div style="position:absolute;top:3172;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We assume the policy π is parametrized by ψ.</nobr></span> <nobr>我们假设策略π由ψ来参数化。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>P</nobr> <font style="font-size:9px">ILCO</font> learns</span> <nobr>P</nobr> <font style="font-size:9px">ILCO</font>学习</span> </div><div style="position:absolute;top:3190;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a good parametrized policy by following Alg.</nobr></span> <nobr>遵循Alg是一个很好的参数化策略。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#3">1</a> [7].</span> <a href="#3">1</a> [7]。</span> </div><div style="position:absolute;top:3221;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A. Probabilistic Dynamics Model</nobr></span> <nobr>A.概率动力学模型</nobr></span> </div><div style="position:absolute;top:3244;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>To avoid certainty equivalence assumptions on the learned</nobr></span> <nobr>为了避免对学习的确定性等值假设</nobr></span> </div><div style="position:absolute;top:3262;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>model,</nobr> <font style="font-size:9px">PILCO</font> takes model uncertainties into account during</span> <nobr>模型中，</nobr> <font style="font-size:9px">PILCO在</font>考虑模型不确定性的情况下，</span> </div><div style="position:absolute;top:3280;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>planning.</nobr></span> <nobr>规划。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Hence, a (posterior) distribution over plausible dy-</nobr></span> <nobr>因此，一个（后验）分布在可信的dy-</nobr></span> </div><div style="position:absolute;top:3298;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>namics models is required.</nobr></span> <nobr>namics模型是必需的。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We use GPs [21</nobr> <a href="#8">]</a> to infer this</span> <nobr>我们使用GPs [21</nobr> <a href="#8">]</a>来推断这一点</span> </div><div style="position:absolute;top:3316;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>posterior distribution from currently available experience.</nobr></span> <nobr>后验分布从现有的经验。</nobr></span> </div><div style="position:absolute;top:3334;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Following [</nobr> <a href="#8">21]</a> , we briefly introduce the notation and stan-</span> <nobr>在[</nobr> <a href="#8">21]之后</a> ，我们简要地介绍符号和stan-</span> </div><div style="position:absolute;top:3352;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dard prediction models for GPs, which are used to infer a</nobr></span> <nobr>预测GP的预测模型，用来推断一个</nobr></span> </div><div style="position:absolute;top:3370;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>distribution on a latent function f from noisy observations</nobr></span> <nobr>由噪声观测得到的潜在函数f上的分布</nobr></span> </div><div style="position:absolute;top:3388;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>y</nobr> <font style="font-size:8px">i</font> = f(x <font style="font-size:8px">i</font> )+ε, where in this paper, we consider ε ∼ N(0,σ <font style="font-size:8px">2</font></span> <nobr>YI</nobr> <font style="font-size:8px">=</font> F（X <font style="font-size:8px">I）+ε，</font>其中在本文中，我们考虑<font style="font-size:8px">ε〜N（0，σ2</font></span> </div><div style="position:absolute;top:3395;left:437"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">)</font></span> <nobr>ε</nobr> <font style="font-size:12px">）</font></span> </div><div style="position:absolute;top:3406;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>iid system noise.</nobr></span> <nobr>iid系统噪音。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A GP is completely specified by a mean</nobr></span> <nobr>GP是完全由平均值指定的</nobr></span> </div><div style="position:absolute;top:3424;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>function m( · ) and a positive semidefinite covariance function</nobr></span> <nobr>函数m（·）和一个半正定的协方差函数</nobr></span> </div><div style="position:absolute;top:3442;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k( · , · ), also called a kernel.</nobr></span> <nobr>k（·，·），也称为内核。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Throughout this paper, we con-</nobr></span> <nobr>在整篇文章中，</nobr></span> </div><div style="position:absolute;top:3460;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>sider a prior mean function m ≡ 0 and the squared exponential</nobr></span> <nobr>先验平均函数m≡0和平方指数</nobr></span> </div><div style="position:absolute;top:3478;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(SE) kernel with automatic relevance determination defined as</nobr></span> <nobr>（SE）内核自动相关性确定定义为</nobr></span> </div><div style="position:absolute;top:3507;left:110"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k(x, x ) = α</nobr> <font style="font-size:8px">2</font> exp ( − <font style="font-size:8px">1</font></span> <nobr><font style="font-size:8px">K（X，X）=α2</font></nobr> EXP（ - <font style="font-size:8px">1</font></span> </div><div style="position:absolute;top:3515;left:250"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:3507;left:257"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(x − x ) Λ</nobr> <font style="font-size:8px">−1</font> (x − x )) .</span> <nobr>（x-x）Λ</nobr> <font style="font-size:8px">-1</font> （x-x））。</span> </div><div style="position:absolute;top:3507;left:433"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(2)</nobr></span> <nobr>（2）</nobr></span> </div><div style="position:absolute;top:3536;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Here, Λ := diag([l</nobr> <font style="font-size:8px">2</font></span> <nobr>这里，Λ：= diag（[l</nobr> <font style="font-size:8px">2</font></span> </div><div style="position:absolute;top:3543;left:196"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr> <font style="font-size:12px">,...,l</font> 2</span> <nobr>1</nobr> <font style="font-size:12px">，...，l</font> 2</span> </div><div style="position:absolute;top:3543;left:242"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>D</nobr> <font style="font-size:12px">]) depends on the characteristic</font></span> <nobr>D</nobr> <font style="font-size:12px">]）取决于特性</font></span> </div><div style="position:absolute;top:3554;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>length-scales l</nobr> <font style="font-size:8px">i</font> , and α <font style="font-size:8px">2</font> is the variance of the latent function f.</span> <nobr>长度尺度L</nobr> <font style="font-size:8px"><font style="font-size:8px">I，α2</font></font>是潜函数f的方差。</span> </div><div style="position:absolute;top:3572;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Given n training inputs X = [x</nobr> <font style="font-size:8px">1</font> ,..., x <font style="font-size:8px">n</font> ] and corresponding</span> <nobr>给定n个训练输入X = [x</nobr> <font style="font-size:8px">1</font> ，...，x <font style="font-size:8px">n</font> ]和相应的</span> </div><div style="position:absolute;top:3590;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>training targets y = [y</nobr> <font style="font-size:8px">1</font> ,...,y <font style="font-size:8px">n</font> ] , the GP hyper-parameters</span> <nobr>训练目标y = [y</nobr> <font style="font-size:8px">1</font> ，...，y <font style="font-size:8px">n</font> ]，GP超参数</span> </div><div style="position:absolute;top:3608;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(length-scales l</nobr> <font style="font-size:8px">i</font> , signal variance α <font style="font-size:8px">2</font> , noise variance σ <font style="font-size:8px">2</font></span> <nobr>（长度尺度L</nobr> <font style="font-size:8px">I，</font>信号方差<font style="font-size:8px">α2，</font>噪声方差<font style="font-size:8px">σ2</font></span> </div><div style="position:absolute;top:3615;left:413"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">) are</font></span> <nobr>ε</nobr> <font style="font-size:12px">）是</font></span> </div><div style="position:absolute;top:3626;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learned using evidence maximization [</nobr> <a href="#8">21]</a> .</span> <nobr>学习使用证据最大化[</nobr> <a href="#8">21]</a> 。</span> </div><div style="position:absolute;top:2640;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The posterior predictive distribution p(f</nobr> <font style="font-size:8px">∗</font> |x <font style="font-size:8px">∗</font> ) of the func-</span> <nobr>函数的后验预测分布p（f</nobr> <font style="font-size:8px">*</font> | x <font style="font-size:8px">*</font> ）</span> </div><div style="position:absolute;top:2658;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tion value f</nobr> <font style="font-size:8px">∗</font> = f(x <font style="font-size:8px">∗</font> ) for an arbitrary, but known, test input</span>对于任意但已知的测试输入， <nobr>f</nobr> <font style="font-size:8px">*</font> = f（x <font style="font-size:8px">*</font> ）</span> </div><div style="position:absolute;top:2675;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>x</nobr> <font style="font-size:8px">∗</font> is Gaussian with mean and variance</span> <nobr>x</nobr> <font style="font-size:8px">*</font>是具有均值和方差的高斯</span> </div><div style="position:absolute;top:2703;left:487"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>m</nobr> <font style="font-size:8px">f</font> (x <font style="font-size:8px">∗</font> ) = E <font style="font-size:8px">f</font> [f <font style="font-size:8px">∗</font> ] = k <font style="font-size:8px">∗</font> (K + σ <font style="font-size:8px">2</font></span> <nobr>米</nobr> <font style="font-size:8px"><font style="font-size:8px">F（X</font> *）=</font> E <font style="font-size:8px">F</font> <font style="font-size:8px">[F *]</font> = K <font style="font-size:8px">*（K</font> <font style="font-size:8px">+σ2</font></span> </div><div style="position:absolute;top:2709;left:682"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">I)</font> −1 <font style="font-size:12px">y = k</font> ∗ <font style="font-size:12px">β ,</font></span> <nobr>εI</nobr> <font style="font-size:12px">）</font> -1 <font style="font-size:12px">y = k</font> * <font style="font-size:12px">β，</font></span> </div><div style="position:absolute;top:2703;left:827"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(3)</nobr></span> <nobr>（3）</nobr></span> </div><div style="position:absolute;top:2726;left:482"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>σ</nobr> <font style="font-size:8px">2</font></span> <nobr><font style="font-size:8px">σ2</font></nobr></span> </div><div style="position:absolute;top:2733;left:491"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>f</nobr> <font style="font-size:12px">(x</font> ∗ <font style="font-size:12px">) = var</font> f <font style="font-size:12px">[f</font> ∗ <font style="font-size:12px">] = k</font> ∗∗ <font style="font-size:12px">− k</font> ∗ <font style="font-size:12px">(K + σ</font> 2</span> <nobr>f</nobr> <font style="font-size:12px">（x</font> * <font style="font-size:12px">）= var</font> f <font style="font-size:12px">[f</font> * <font style="font-size:12px">] = k</font> ** <font style="font-size:12px">-  k</font> * <font style="font-size:12px">（K +σ2</font></span> </div><div style="position:absolute;top:2733;left:721"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">I)</font> −1 <font style="font-size:12px">k</font> ∗ <font style="font-size:12px">+ σ</font> 2</span> <nobr>εI</nobr> <font style="font-size:12px">）</font> -1 <font style="font-size:12px">k</font> * <font style="font-size:12px">+σ2</font></span> </div><div style="position:absolute;top:2733;left:799"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">, (4)</font></span> <nobr>ε</nobr> <font style="font-size:12px">，（4）</font></span> </div><div style="position:absolute;top:2754;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>respectively, with k</nobr> <font style="font-size:8px">∗</font> := k(X, x <font style="font-size:8px">∗</font> ), k <font style="font-size:8px">∗∗</font> := k(x <font style="font-size:8px">∗</font> , x <font style="font-size:8px">∗</font> ), β :=</span> <nobr>分别用k</nobr> <font style="font-size:8px">*</font> ：= k（x，x <font style="font-size:8px">*</font> ），k <font style="font-size:8px">**</font> ：= k（x <font style="font-size:8px">*</font> ，x <font style="font-size:8px">*</font> ），β：=</span> </div><div style="position:absolute;top:2771;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(K+σ</nobr> <font style="font-size:8px">2</font></span> <nobr>（K <font style="font-size:8px">+σ2</font></nobr></span> </div><div style="position:absolute;top:2778;left:512"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">I)</font> −1 <font style="font-size:12px">y, and where K is the kernel matrix with entries</font></span> <nobr>εI</nobr> <font style="font-size:12px">）</font> -1 <font style="font-size:12px">y，其中K是具有条目的核矩阵</font></span> </div><div style="position:absolute;top:2789;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>K</nobr> <font style="font-size:8px">ij</font> = k(x <font style="font-size:8px">i</font> , x <font style="font-size:8px">j</font> ).</span> <nobr>K</nobr> <font style="font-size:8px">ij</font> = k（x <font style="font-size:8px">i</font> ，x <font style="font-size:8px">j</font> ）。</span> </div><div style="position:absolute;top:2807;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In our robotic system, see Sec.</nobr></span> <nobr>在我们的机器人系统中，参见Sec。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#2">III</a> , the GP models the</span> <a href="#2">三</a> ，GP模型</span> </div><div style="position:absolute;top:2825;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>function f : R</nobr> <font style="font-size:8px">7</font> → R <font style="font-size:8px">3</font> , (x <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ) ↦→ ∆ <font style="font-size:8px">t</font> := x <font style="font-size:8px">t</font> −x <font style="font-size:8px">t−1</font> +ε <font style="font-size:8px">t</font> ,</span> <nobr>函数f，R</nobr> <font style="font-size:8px">7→R</font> <font style="font-size:8px">3，（X</font> <font style="font-size:8px">T-1，U</font> <font style="font-size:8px"><font style="font-size:8px">T-1）↦→ΔT：=</font></font> X <font style="font-size:8px">吨</font> -x <font style="font-size:8px">T-1</font> <font style="font-size:8px">+εT，</font></span> </div><div style="position:absolute;top:2843;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>where ε</nobr> <font style="font-size:8px">t</font> ∈ R <font style="font-size:8px">3</font> is iid Gaussian system noise.</span> <nobr>其中，ε</nobr> <font style="font-size:8px">吨</font> ∈R <font style="font-size:8px">3</font>是独立同分布的高斯系统噪声。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The training</span>培训</span> </div><div style="position:absolute;top:2861;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>inputs and targets to the GP model are tuples (x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ),</span> <nobr>GP模型的输入和目标是元组（x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ），</span> </div><div style="position:absolute;top:2879;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and the corresponding differences ∆</nobr> <font style="font-size:8px">t</font> , respectively.</span> <nobr>和相应的差值</nobr> <font style="font-size:8px">Δt</font> 。</span> </div><div style="position:absolute;top:2906;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B. Long-Term Planning through Approximate Inference</nobr></span> <nobr>B.通过近似推断进行长期规划</nobr></span> </div><div style="position:absolute;top:2928;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Minimizing and evaluating J</nobr> <font style="font-size:8px">π</font> in Eq.</span> <nobr>最大限度地减少和</nobr>公式<nobr>评估<font style="font-size:8px">Ĵπ。</font></nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#3">(1</a> ) requires long-term</span> <a href="#3">（1</a> ）需要长期的</span> </div><div style="position:absolute;top:2946;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>predictions of the state evolution.</nobr></span> <nobr>对状态演化的预测。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>To obtain the state distribu-</nobr></span> <nobr>为了获得状态分配 - </nobr></span> </div><div style="position:absolute;top:2964;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tions p(x</nobr> <font style="font-size:8px">1</font> ),...,p(x <font style="font-size:8px">T</font> ), we cascade one-step predictions.</span> <nobr>（x</nobr> <font style="font-size:8px">1</font> ），...，p（x <font style="font-size:8px">T</font> ），我们级联一步预测。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">This</span>这个</span> </div><div style="position:absolute;top:2982;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>requires mapping uncertain (test) inputs through a GP model.</nobr></span> <nobr>需要通过GP模型映射不确定（测试）输入。</nobr></span> </div><div style="position:absolute;top:3000;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In the following, we assume that these test inputs are Gaussian</nobr></span> <nobr>在下面，我们假设这些测试输入是高斯的</nobr></span> </div><div style="position:absolute;top:3018;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>distributed and extend the results from [</nobr> <a href="#8">17,</a> 6, 7] to long-term</span> <nobr>将结果从[</nobr> <a href="#8">17，6，7</a> ] <nobr>分发并延伸</nobr>到长期</span> </div><div style="position:absolute;top:3036;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>planning in stochastic systems with control inputs.</nobr></span> <nobr>在具有控制输入的随机系统中进行规划。</nobr></span> </div><div style="position:absolute;top:3054;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>For predicting x</nobr> <font style="font-size:8px">t</font> from p(x <font style="font-size:8px">t−1</font> ), we require a joint distribu-</span> <nobr>为了</nobr>从p（x <font style="font-size:8px">t-1</font> ）中<nobr>预测x</nobr> <font style="font-size:8px">t</font> ，我们需要一个联合分配 -</span> </div><div style="position:absolute;top:3072;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tion p(x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ).</span> <nobr>（x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ）。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">To compute this distribution, we use that</span>为了计算这个分布，我们使用它</span> </div><div style="position:absolute;top:3089;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>u</nobr> <font style="font-size:8px">t−1</font> = π(x <font style="font-size:8px">t−1</font> ), ie, the control is a function of the state:</span> <nobr>u</nobr> <font style="font-size:8px">t-1</font> =π（x <font style="font-size:8px">t-1</font> ），即控制是状态的函数：</span> </div><div style="position:absolute;top:3107;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We first compute the predictive control signal p(u</nobr> <font style="font-size:8px">t−1</font> ) and</span> <nobr>我们首先计算预测控制信号p（u</nobr> <font style="font-size:8px">t-1</font> ）和</span> </div><div style="position:absolute;top:3125;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>subsequently the cross-covariance cov[x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ].</span> <nobr>随后的协方差cov [x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Finally, we</span>最后，我们</span> </div><div style="position:absolute;top:3143;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>approximate p(x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ) by a Gaussian distribution with the</span> <nobr>近似p（x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ）用高斯分布与</span> </div><div style="position:absolute;top:3161;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>correct mean and covariance.</nobr></span> <nobr>正确的均值和协方差。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The computation depends on the</nobr></span> <nobr>计算取决于</nobr></span> </div><div style="position:absolute;top:3179;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>policy parametrization ψ of the policy π.</nobr></span> <nobr>政策的政策参数化ψ。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In this paper, we</nobr></span> <nobr>在本文中，我们</nobr></span> </div><div style="position:absolute;top:3197;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>assume u</nobr> <font style="font-size:8px">t−1</font> = π(x <font style="font-size:8px">t−1</font> ) = Ax <font style="font-size:8px">t−1</font> + b, with ψ = {A, b}.</span> <nobr>假设u</nobr> <font style="font-size:8px">t-1</font> =π（x <font style="font-size:8px">t-1</font> ）= Ax <font style="font-size:8px">t-1</font> + b，其中ψ= {A，b}。</span> </div><div style="position:absolute;top:3215;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>With p(x</nobr> <font style="font-size:8px">t−1</font> ) = N(x <font style="font-size:8px">t−1</font> | µ <font style="font-size:8px">t−1</font> , Σ <font style="font-size:8px">t−1</font> ), we obtain</span> <nobr>随着p（x</nobr> <font style="font-size:8px">t-1</font> ）= N（x <font style="font-size:8px">t-1</font> |μt <font style="font-size:8px">-1</font> ，Σt <font style="font-size:8px">-1</font> ），我们获得</span> </div><div style="position:absolute;top:3242;left:516"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(u</nobr> <font style="font-size:8px">t−1</font> ) = N(u <font style="font-size:8px">t−1</font> | µ <font style="font-size:8px">u</font> , Σ <font style="font-size:8px">u</font> ) ,</span> <nobr><font style="font-size:8px">P（ûT-1）=</font></nobr> N（U <font style="font-size:8px">T-1</font> <font style="font-size:8px"><font style="font-size:8px">|μU，ΣU），</font></font></span> </div><div style="position:absolute;top:3265;left:547"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>µ</nobr> <font style="font-size:8px">u</font> = Aµ <font style="font-size:8px">t−1</font> + b , Σ <font style="font-size:8px">u</font> = AΣ <font style="font-size:8px">t−1</font> A ,</span> <nobr><font style="font-size:8px">μU</font></nobr> <font style="font-size:8px">=AμT-1</font> + <font style="font-size:8px">B，ΣU</font> <font style="font-size:8px">=AσT-1</font> A，</span> </div><div style="position:absolute;top:3293;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>by applying standard results from linear-Gaussian models.</nobr></span> <nobr>通过应用线性高斯模型的标准结果。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In</nobr></span> <nobr>在</nobr></span> </div><div style="position:absolute;top:3310;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>this example, π is a linear function of x</nobr> <font style="font-size:8px">t−1</font> and, thus, the</span> <nobr>这个例子中，π是x</nobr> <font style="font-size:8px">t-1</font> <nobr>的线性函数</nobr> ，因此，</span> </div><div style="position:absolute;top:3328;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>desired joint distribution p(x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ) is exactly Gaussian</span> <nobr>期望的联合分布p（x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ）恰好是高斯的</span> </div><div style="position:absolute;top:3346;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and given by</nobr></span> <nobr>并由...给出</nobr></span> </div><div style="position:absolute;top:3379;left:515"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>N</nobr></span> <nobr>ñ</nobr></span> </div><div style="position:absolute;top:3369;left:532"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>([</nobr></span> <nobr>（[</nobr></span> </div><div style="position:absolute;top:3371;left:572"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>µ</nobr> <font style="font-size:8px">t−1</font></span> <nobr>μt</nobr> <font style="font-size:8px">-1</font></span> </div><div style="position:absolute;top:3389;left:551"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Aµ</nobr> <font style="font-size:8px">t−1</font> + b</span> <nobr>Aμt</nobr> <font style="font-size:8px">-1</font> + b</span> </div><div style="position:absolute;top:3369;left:623"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>]</nobr></span> <nobr>]</nobr></span> </div><div style="position:absolute;top:3380;left:634"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>,</nobr></span> <nobr>，</nobr></span> </div><div style="position:absolute;top:3369;left:640"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>[ Σ</nobr> <font style="font-size:8px">t−1</font></span> <nobr>[Σt</nobr> <font style="font-size:8px">-1</font></span> </div><div style="position:absolute;top:3371;left:716"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Σ</nobr> <font style="font-size:8px">t−1</font> A</span> <nobr>Σt</nobr> <font style="font-size:8px">-1</font> A</span> </div><div style="position:absolute;top:3389;left:648"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>AΣ</nobr> <font style="font-size:8px">t−1</font></span> <nobr>AΣt</nobr> <font style="font-size:8px">-1</font></span> </div><div style="position:absolute;top:3389;left:709"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>AΣ</nobr> <font style="font-size:8px">t−1</font> A</span> <nobr>AΣt</nobr> <font style="font-size:8px">-1</font> A</span> </div><div style="position:absolute;top:3369;left:778"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>])</nobr></span> <nobr>]）</nobr></span> </div><div style="position:absolute;top:3381;left:827"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(5)</nobr></span> <nobr>（5）</nobr></span> </div><div style="position:absolute;top:3417;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with the cross-covariance cov[x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ] = Σ <font style="font-size:8px">t−1</font> A .</span> <nobr>与协方差cov [x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ] =Σt <font style="font-size:8px">-1</font> A。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">For</span>对于</span> </div><div style="position:absolute;top:3435;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>many other interesting controller parametrizations, the mean</nobr></span> <nobr>许多其他有趣的控制器参数化的意思</nobr></span> </div><div style="position:absolute;top:3453;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and covariance can be computed analytically [</nobr> <a href="#8">6]</a> , although</span> <nobr>协方差可以通过分析来计算[</nobr> <a href="#8">6]</a></span> </div><div style="position:absolute;top:3470;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(x</nobr> <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ) may no longer be exactly Gaussian.</span> <nobr>p（x</nobr> <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ）可能不再是精确的高斯分布。</span> </div><div style="position:absolute;top:3488;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>From now on, we assume a joint Gaussian distribution</nobr></span> <nobr>从现在起，我们假设一个联合的高斯分布</nobr></span> </div><div style="position:absolute;top:3506;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(˜x</nobr> <font style="font-size:8px">t−1</font> ) = N(˜x <font style="font-size:8px">t−1</font> | ˜µ <font style="font-size:8px">t−1</font> , ˜Σ <font style="font-size:8px">t−1</font> ) at time t − 1, where we</span>在时间t <font style="font-size:8px">-1</font>处的<nobr>p（~</nobr> <font style="font-size:8px">x t-1</font> ）= N（〜x <font style="font-size:8px">t-1</font> |~μt <font style="font-size:8px">-1</font> ，〜Σt <font style="font-size:8px">-1</font> ）</span> </div><div style="position:absolute;top:3525;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>define ˜x := [xu ] and ˜µ and˜Σ are the respective mean</nobr></span> <nobr>定义~x：= [xu]，而μμ和ΣΣ是相应的平均值</nobr></span> </div><div style="position:absolute;top:3543;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and covariance of this augmented variable.</nobr></span> <nobr>和这个增广变量的协方差。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>When predicting</nobr></span> <nobr>预测时</nobr></span> </div><div style="position:absolute;top:3576;left:520"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(∆</nobr> <font style="font-size:8px">t</font> ) =</span> <nobr>p（</nobr> <font style="font-size:8px">Δt</font> ）=</span> </div><div style="position:absolute;top:3565;left:576"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∫</nobr></span> <nobr>∫</nobr></span> </div><div style="position:absolute;top:3576;left:594"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(f(˜x</nobr> <font style="font-size:8px">t−1</font> )|˜x <font style="font-size:8px">t−1</font> )p(˜x <font style="font-size:8px">t−1</font> )d˜x <font style="font-size:8px">t−1</font> ,</span> <nobr>p（f（~</nobr> <font style="font-size:8px">x t-1</font> ）|〜x <font style="font-size:8px">t-1</font> ）p（~ <font style="font-size:8px">x t-1</font> ）d〜x <font style="font-size:8px">t-1</font> ，</span> </div><div style="position:absolute;top:3576;left:827"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(6)</nobr></span> <nobr>（6）</nobr></span> </div><div style="position:absolute;top:3608;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>we integrate out the random variable ˜x</nobr> <font style="font-size:8px">t−1</font> .</span> <nobr>我们整理出随机变量〜x</nobr> <font style="font-size:8px">t-1</font> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The transition</span>过渡</span> </div><div style="position:absolute;top:3626;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>probability p(f(˜x</nobr> <font style="font-size:8px">t−1</font> )|˜x <font style="font-size:8px">t−1</font> ) is obtained from the posterior GP.</span>从后GP得到<nobr>概率p（f（~</nobr> <font style="font-size:8px">x t-1</font> ）|〜x <font style="font-size:8px">t-1</font> ）。</span> </div><div style="position:absolute;top:3189;left:377"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:3016;left:735"><a href="#8" style="background-color:#0000ff;padding:14px 10px;"></a></div><div style="position:absolute;top:3017;left:751"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:3739;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="4"><b>Page 4</b></a></font></span> <font face="arial,sans-serif"><a name="4"><b>第4页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:3888;left:241"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−1</nobr></span> <nobr>-1</nobr></span> </div><div style="position:absolute;top:3888;left:274"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.5</nobr></span> <nobr>-0.5</nobr></span> </div><div style="position:absolute;top:3888;left:316"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:3888;left:350"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.5</nobr></span> <nobr>0.5</nobr></span> </div><div style="position:absolute;top:3888;left:388"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:3881;left:238"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−1</nobr></span> <nobr>-1</nobr></span> </div><div style="position:absolute;top:3868;left:234"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.5</nobr></span> <nobr>-0.5</nobr></span> </div><div style="position:absolute;top:3855;left:242"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:3843;left:237"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.5</nobr></span> <nobr>0.5</nobr></span> </div><div style="position:absolute;top:3830;left:242"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:3852;left:229"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A t</nobr></span> <nobr>在</nobr></span> </div><div style="position:absolute;top:3927;left:241"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−1</nobr></span> <nobr>-1</nobr></span> </div><div style="position:absolute;top:3927;left:274"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.5</nobr></span> <nobr>-0.5</nobr></span> </div><div style="position:absolute;top:3927;left:316"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:3927;left:350"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.5</nobr></span> <nobr>0.5</nobr></span> </div><div style="position:absolute;top:3927;left:388"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:3924;left:242"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:3910;left:242"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:3932;left:306"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(xt−1,ut−1)</nobr></span> <nobr>（XT-1，UT-1）</nobr></span> </div><div style="position:absolute;top:3922;left:237"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(x</nobr></span> <nobr>P（X</nobr></span> </div><div style="position:absolute;top:3914;left:240"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>t−1</nobr></span> <nobr>T-1</nobr></span> </div><div style="position:absolute;top:3908;left:237"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>,u</nobr></span> <nobr>，U</nobr></span> </div><div style="position:absolute;top:3903;left:240"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>t−1</nobr></span> <nobr>T-1</nobr></span> </div><div style="position:absolute;top:3897;left:237"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>)</nobr></span> <nobr>）</nobr></span> </div><div style="position:absolute;top:3881;left:120"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−1</nobr></span> <nobr>-1</nobr></span> </div><div style="position:absolute;top:3868;left:115"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.5</nobr></span> <nobr>-0.5</nobr></span> </div><div style="position:absolute;top:3855;left:123"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:3843;left:119"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.5</nobr></span> <nobr>0.5</nobr></span> </div><div style="position:absolute;top:3830;left:123"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:3888;left:218"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:3888;left:192"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.5</nobr></span> <nobr>0.5</nobr></span> </div><div style="position:absolute;top:3888;left:172"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:3888;left:146"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1.5</nobr></span> <nobr>1.5</nobr></span> </div><div style="position:absolute;top:3892;left:167"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(At)</nobr></span> <nobr>拍）</nobr></span> </div><div style="position:absolute;top:3958;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig. 2.</nobr></span> <nobr>图2。</nobr></span> </div><div style="position:absolute;top:3958;left:133"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>GP prediction at an uncertain input.</nobr></span> <nobr>在不确定的输入下的GP预测。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The input distribution</nobr></span> <nobr>输入分配</nobr></span> </div><div style="position:absolute;top:3971;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(x</nobr> <font style="font-size:6px">t−1</font> , u <font style="font-size:6px">t−1</font> ) is assumed Gaussian (lower right).</span>假设<nobr>p（x</nobr> <font style="font-size:6px">t-1</font> ，u <font style="font-size:6px">t-1</font> ）为高斯（右下）。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Propagating it through</span>通过传播</span> </div><div style="position:absolute;top:3985;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the GP model (upper right) yields the shaded distribution p(∆</nobr> <font style="font-size:6px">t</font> ) in the upper</span> <nobr>GP模型（右上）产生上面的阴影分布p（</nobr> <font style="font-size:6px">Δt</font> ）</span> </div><div style="position:absolute;top:3998;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>left, which is approximated by a Gaussian with the exact mean and variance.</nobr></span> <nobr>左边，这是用高斯近似的精确的均值和方差。</nobr></span> </div><div style="position:absolute;top:4046;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Computing the exact predictive distribution in Eq.</nobr></span> <nobr>计算公式中的精确预测分布</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(6</nobr> <a href="#3">)</a> is analyt-</span> <nobr>（6</nobr> <a href="#3">）</a>是分析 -</span> </div><div style="position:absolute;top:4064;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ically intractable.</nobr></span> <nobr>难以处理。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Thus, we approximate p(∆</nobr> <font style="font-size:8px">t</font> ) by a Gaussian</span> <nobr>因此，我们</nobr>用高斯<nobr>近似p（</nobr> <font style="font-size:8px">Δt</font> ）</span> </div><div style="position:absolute;top:4082;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with the exact mean and variance (moment matching).</nobr></span> <nobr>与确切的均值和方差（时刻匹配）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig.</nobr> <a href="#4">2</a></span> <nobr>图</nobr> <a href="#4">2</a></span> </div><div style="position:absolute;top:4100;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>illustrates the scenario.</nobr></span> <nobr>说明了情况。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Note that for computing the mean µ</nobr> <font style="font-size:8px">∆</font></span> <nobr>请注意，为了计算平均值</nobr> <font style="font-size:8px">μΔ</font></span> </div><div style="position:absolute;top:4118;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and the variance σ</nobr> <font style="font-size:8px">2</font></span> <nobr>和方差<font style="font-size:8px">σ2</font></nobr></span> </div><div style="position:absolute;top:4125;left:184"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∆</nobr> <font style="font-size:12px">of the predictive distribution, the standard</font></span> <font style="font-size:12px">预测分布的</font> <nobr>Δ</nobr> <font style="font-size:12px">，标准</font></span> </div><div style="position:absolute;top:4136;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>GP predictive distribution (see Eqs. (</nobr> <a href="#3">3)</a> and (4), respectively)</span> <nobr>GP预测分布（分别见公式（</nobr> <a href="#3">3）</a>和（4））</span> </div><div style="position:absolute;top:4154;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>does not suffice because ˜x</nobr> <font style="font-size:8px">t−1</font> is not given deterministically.</span> <nobr>这是不够的，因为〜x</nobr> <font style="font-size:8px">t-1</font>没有确定性地给出。</span> </div><div style="position:absolute;top:4172;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Assume the mean µ</nobr> <font style="font-size:8px">∆</font> and the covariance Σ <font style="font-size:8px">∆</font> of the</span> <nobr>假设平均<font style="font-size:8px">μΔ</font></nobr>和的协方差<font style="font-size:8px">ΣΔ</font></span> </div><div style="position:absolute;top:4190;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>predictive distribution p(∆</nobr> <font style="font-size:8px">t</font> ) are known.</span> <nobr>预测分布p（</nobr> <font style="font-size:8px">Δt</font> ）是已知的。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Then, a Gaussian</span>然后，一个高斯</span> </div><div style="position:absolute;top:4207;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>approximation N(x</nobr> <font style="font-size:8px">t</font> | µ <font style="font-size:8px">t</font> , Σ <font style="font-size:8px">t</font> ) to the desired state distribution</span> <nobr>近似度N（X</nobr> <font style="font-size:8px">T</font> <font style="font-size:8px"><font style="font-size:8px">|μT，Σt）</font></font>输出到所需状态分布</span> </div><div style="position:absolute;top:4225;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>p(x</nobr> <font style="font-size:8px">t</font> ) has mean and covariance</span> <nobr>p（x</nobr> <font style="font-size:8px">t</font> ）具有均值和协方差</span> </div><div style="position:absolute;top:4253;left:95"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>µ</nobr> <font style="font-size:8px">t</font> = µ <font style="font-size:8px">t−1</font> + µ <font style="font-size:8px">∆</font></span> <nobr><font style="font-size:8px">μT</font></nobr> <font style="font-size:8px">=μT-1</font> <font style="font-size:8px">+μΔ</font></span> </div><div style="position:absolute;top:4253;left:433"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(7)</nobr></span> <nobr>（7）</nobr></span> </div><div style="position:absolute;top:4275;left:94"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Σ</nobr> <font style="font-size:8px">t</font> = Σ <font style="font-size:8px">t−1</font> + Σ <font style="font-size:8px">∆</font> + cov[x <font style="font-size:8px">t−1</font> , ∆ <font style="font-size:8px">t</font> ] + cov[∆ <font style="font-size:8px">t</font> , x <font style="font-size:8px">t−1</font> ] , (8)</span> <nobr>Σ</nobr> <font style="font-size:8px">吨</font> <font style="font-size:8px">=ΣT-1</font> <font style="font-size:8px">+ΣΔ+</font> COV [X <font style="font-size:8px"><font style="font-size:8px">T-1，ΔT]</font></font> + COV <font style="font-size:8px">[ΔT，X</font> <font style="font-size:8px">T-1]，（8）</font></span> </div><div style="position:absolute;top:4298;left:89"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>cov[x</nobr> <font style="font-size:8px">t−1</font> , ∆ <font style="font-size:8px">t</font> ] = cov[x <font style="font-size:8px">t−1</font> , u <font style="font-size:8px">t−1</font> ]Σ <font style="font-size:8px">−1</font></span> <nobr>cov [x</nobr> <font style="font-size:8px">t-1</font> ， <font style="font-size:8px">Δt</font> ] = cov [x <font style="font-size:8px">t-1</font> ，u <font style="font-size:8px">t-1</font> ]Σ <font style="font-size:8px">-1</font></span> </div><div style="position:absolute;top:4305;left:302"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>u</nobr> <font style="font-size:12px">cov[u</font> t−1 <font style="font-size:12px">, ∆</font> t <font style="font-size:12px">] ,</font></span> <nobr>u</nobr> <font style="font-size:12px">cov [u</font> t-1 <font style="font-size:12px">，</font> Δt <font style="font-size:12px">]，</font></span> </div><div style="position:absolute;top:4298;left:433"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(9)</nobr></span> <nobr>（9）</nobr></span> </div><div style="position:absolute;top:4326;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>respectively.</nobr></span> <nobr>分别。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The computation of the required cross-</nobr></span> <nobr>计算所需的横截面积，</nobr></span> </div><div style="position:absolute;top:4344;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>covariances in Eq.</nobr></span> <nobr>方程中的协方差</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">9</a> ) depends on the policy parametrization,</span> <nobr>（</nobr> <a href="#4">9</a> ）取决于政策参数化，</span> </div><div style="position:absolute;top:4362;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>but can often be computed analytically.</nobr></span> <nobr>但往往可以分析计算。</nobr></span> </div><div style="position:absolute;top:4380;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In the following, we compute the mean µ</nobr> <font style="font-size:8px">∆</font> and the variance</span> <nobr>在下面，我们计算均值</nobr> <font style="font-size:8px">μΔ</font>和方差</span> </div><div style="position:absolute;top:4397;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>σ</nobr> <font style="font-size:8px">2</font></span> <nobr><font style="font-size:8px">σ2</font></nobr></span> </div><div style="position:absolute;top:4404;left:82"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∆</nobr> <font style="font-size:12px">of the predictive distribution p(∆</font> t <font style="font-size:12px">), see Eq.</font></span> <font style="font-size:12px">预测分布p的<font style="font-size:12px"><nobr>Δ（ΔT），</nobr></font></font> <font style="font-size:12px">见方程</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px"><a href="#3">(6</a> ).</font></span> <font style="font-size:12px"><a href="#3">（6</a> ）。</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px">We focus</font></span> <font style="font-size:12px">我们专注</font></span> </div><div style="position:absolute;top:4415;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>on the univariate case and refer to [</nobr> <a href="#8">6]</a> for the multivariate case.</span> <nobr>在单变量的情况下，参考[</nobr> <a href="#8">6]</a>多变量的情况。</span> </div><div style="position:absolute;top:4433;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1) Mean: Following the law of iterated expectations,</nobr></span> <nobr>1）意思是：按照迭代期望的定律，</nobr></span> </div><div style="position:absolute;top:4461;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>µ</nobr> <font style="font-size:8px">∆</font> = E <font style="font-size:8px">˜x</font> <font style="font-size:5px">t−1</font> [E <font style="font-size:8px">f</font> [f(˜x <font style="font-size:8px">t−1</font> )|˜x <font style="font-size:8px">t−1</font> ]] = E <font style="font-size:8px">x</font> <font style="font-size:5px">∗</font> [m <font style="font-size:8px">f</font> (˜x <font style="font-size:8px">t−1</font> )]</span> <nobr><font style="font-size:8px">μΔ=</font></nobr> <font style="font-size:8px">E〜X</font> <font style="font-size:5px">T-1</font> [E <font style="font-size:8px">F</font> [F（〜X <font style="font-size:8px">T-1）|〜X</font> <font style="font-size:8px">T-1]</font> = E <font style="font-size:8px">X</font> <font style="font-size:5px">* [M</font> <font style="font-size:8px">F（〜X</font> <font style="font-size:8px">T-1）]</font></span> </div><div style="position:absolute;top:4461;left:425"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(10)</nobr></span> <nobr>（10）</nobr></span> </div><div style="position:absolute;top:4491;left:112"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>=</nobr></span> <nobr>=</nobr></span> </div><div style="position:absolute;top:4480;left:128"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∫</nobr></span> <nobr>∫</nobr></span> </div><div style="position:absolute;top:4491;left:145"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>m</nobr> <font style="font-size:8px">f</font> (˜x <font style="font-size:8px">t−1</font> )N(˜x <font style="font-size:8px">t−1</font> | ˜µ <font style="font-size:8px">t−1</font> , ˜Σ <font style="font-size:8px">t−1</font> ) d˜x <font style="font-size:8px">t−1</font> = β q</span> <nobr>m</nobr> <font style="font-size:8px">f</font> （~ <font style="font-size:8px">x t-1</font> ）N（~ <font style="font-size:8px">x t-1</font> |~μt <font style="font-size:8px">-1</font> ，~Σt <font style="font-size:8px">-1</font> ）d〜x <font style="font-size:8px">t-1</font> =βq</span> </div><div style="position:absolute;top:4526;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with q = [q</nobr> <font style="font-size:8px">1</font> ,...,q <font style="font-size:8px">n</font> ] and β = (K + σ <font style="font-size:8px">2</font></span> <nobr>其中q = [Q</nobr> <font style="font-size:8px">1，...，Q</font> <font style="font-size:8px">n]</font>和β=（K <font style="font-size:8px">+σ2</font></span> </div><div style="position:absolute;top:4533;left:327"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">I)</font> −1 <font style="font-size:12px">y.</font></span> <nobr>εI</nobr> <font style="font-size:12px">）</font> -1 <font style="font-size:12px">y。</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px">The entries</font></span> <font style="font-size:12px">条目</font></span> </div><div style="position:absolute;top:4544;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of q ∈ R</nobr> <font style="font-size:8px">n</font> are given as</span> <nobr>q∈R</nobr> <font style="font-size:8px">n</font>给出</span> </div><div style="position:absolute;top:4577;left:110"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>q</nobr> <font style="font-size:8px">i</font> =</span> <nobr>q</nobr> <font style="font-size:8px">i</font> =</span> </div><div style="position:absolute;top:4567;left:142"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∫</nobr></span> <nobr>∫</nobr></span> </div><div style="position:absolute;top:4577;left:159"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k(x</nobr> <font style="font-size:8px">i</font> , x <font style="font-size:8px">∗</font> )N(˜x <font style="font-size:8px">t−1</font> | ˜µ <font style="font-size:8px">t−1</font> , ˜Σ <font style="font-size:8px">t−1</font> ) d˜x <font style="font-size:8px">t−1</font></span> <nobr>k（x</nobr> <font style="font-size:8px">i</font> ，x <font style="font-size:8px">*</font> ）N（~x <font style="font-size:8px">t-1</font> |~μt <font style="font-size:8px">-1</font> ，〜Σt <font style="font-size:8px">-1</font> ）d〜x <font style="font-size:8px">t-1</font></span> </div><div style="position:absolute;top:4617;left:126"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>=</nobr></span> <nobr>=</nobr></span> </div><div style="position:absolute;top:4610;left:144"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>α</nobr> <font style="font-size:5px">2</font> exp</span> <nobr><font style="font-size:5px">α2</font></nobr> EXP</span> </div><div style="position:absolute;top:4605;left:180"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr></span> <nobr>（</nobr></span> </div><div style="position:absolute;top:4610;left:186"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:4604;left:198"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:4615;left:198"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:4610;left:205"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(x</nobr> <font style="font-size:5px">i</font> −˜x <font style="font-size:5px">t−1</font> ) ( ˜Σ <font style="font-size:5px">t−1</font> +Λ) <font style="font-size:5px">−1</font> (x <font style="font-size:5px">i</font> −˜x <font style="font-size:5px">t−1</font> )</span> <nobr>（x</nobr> <font style="font-size:5px">i</font> -x <font style="font-size:5px">t-1</font> ）（〜Σt <font style="font-size:5px">-1</font> +Λ） <font style="font-size:5px">-1</font> （x <font style="font-size:5px">i</font> -x <font style="font-size:5px">t-1</font> ）</span> </div><div style="position:absolute;top:4605;left:398"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>)</nobr></span> <nobr>）</nobr></span> </div><div style="position:absolute;top:4615;left:232"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>√</nobr></span> <nobr>√</nobr></span> </div><div style="position:absolute;top:4629;left:244"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>|</nobr></span> <nobr>|</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>˜Σ</nobr> <font style="font-size:5px">t−1</font> Λ <font style="font-size:5px">−1</font> +I|</span> <nobr>〜Σt</nobr> <font style="font-size:5px">-1</font> <font style="font-size:5px">^ -1</font> + I |</span> </div><div style="position:absolute;top:4617;left:409"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>.</nobr></span> <nobr>。</nobr></span> </div><div style="position:absolute;top:4650;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2) Variance: Using the law of total variance, we obtain</nobr></span> <nobr>2）方差：使用总方差的规律，我们得到</nobr></span> </div><div style="position:absolute;top:4677;left:81"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>σ</nobr> <font style="font-size:8px">2</font></span> <nobr><font style="font-size:8px">σ2</font></nobr></span> </div><div style="position:absolute;top:4684;left:90"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∆</nobr> <font style="font-size:12px">= E</font> ˜x <font style="font-size:5px">t−1</font> <font style="font-size:12px">[m</font> f <font style="font-size:12px">(˜x</font> t−1 <font style="font-size:12px">)</font> 2 <font style="font-size:12px">] + E</font> ˜x <font style="font-size:5px">t−1</font> <font style="font-size:12px">[σ</font> 2</span> <nobr>Δ</nobr> <font style="font-size:12px">=</font> E〜x <font style="font-size:5px">t-1</font> <font style="font-size:12px">[m</font> f <font style="font-size:12px">（〜x</font> t-1 <font style="font-size:12px">）</font> 2 <font style="font-size:12px">] +</font> E〜x <font style="font-size:5px">t-1</font> <font style="font-size:12px">[σ2</font></span> </div><div style="position:absolute;top:4684;left:302"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>f</nobr> <font style="font-size:12px">(˜x</font> t−1 <font style="font-size:12px">)</font></span> <nobr>f</nobr> <font style="font-size:12px">（〜x</font> t-1 <font style="font-size:12px">）</font></span> </div><div style="position:absolute;top:4699;left:118"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>− E</nobr> <font style="font-size:8px">˜x</font> <font style="font-size:5px">t−1</font> [m <font style="font-size:8px">f</font> (˜x <font style="font-size:8px">t−1</font> )] <font style="font-size:8px">2</font></span> <nobr>-</nobr> <font style="font-size:8px">E〜x</font> <font style="font-size:5px">t-1</font> [m <font style="font-size:8px">f</font> （〜x <font style="font-size:8px">t-1</font> ）] <font style="font-size:8px">2</font></span> </div><div style="position:absolute;top:4724;left:104"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>= β Qβ + α</nobr> <font style="font-size:8px">2</font> − tr ((K + σ <font style="font-size:8px">2</font></span> <nobr><font style="font-size:8px">=βQβ+α2</font></nobr> - TR（（K <font style="font-size:8px">+σ2</font></span> </div><div style="position:absolute;top:4731;left:284"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">I)</font> −1 <font style="font-size:12px">Q) − µ</font> 2</span> <nobr>εI</nobr> <font style="font-size:12px">）</font> -1 <font style="font-size:12px">Q） - μ2</font></span> </div><div style="position:absolute;top:4731;left:366"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∆</nobr> <font style="font-size:12px">+ σ</font> 2</span> <nobr>Δ</nobr> <font style="font-size:12px">+σ2</font></span> </div><div style="position:absolute;top:4731;left:404"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ε</nobr> <font style="font-size:12px">, (11)</font></span> <nobr>ε</nobr> <font style="font-size:12px">，（11）</font></span> </div><div style="position:absolute;top:4752;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>where tr( · ) is the trace.</nobr></span> <nobr>tr（·）是轨迹。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The entries of Q ∈ R</nobr> <font style="font-size:8px">n×n</font> are</span> <nobr>Q∈R</nobr> <font style="font-size:8px">n×n</font> <nobr>的条目</nobr>是</span> </div><div style="position:absolute;top:4784;left:110"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Q</nobr> <font style="font-size:8px">ij</font> =k(x <font style="font-size:8px">i</font> , ˜µ <font style="font-size:8px">t−1</font> )k(x <font style="font-size:8px">j</font> , ˜µ <font style="font-size:8px">t−1</font> )|2˜Σ <font style="font-size:8px">t−1</font> Λ <font style="font-size:8px">−1</font> + I|</span> <nobr>Q</nobr> <font style="font-size:8px">IJ</font> = K（X <font style="font-size:8px"><font style="font-size:8px"><font style="font-size:8px"><font style="font-size:8px"><font style="font-size:8px"><font style="font-size:8px">I，~μT-1）K（XĴ，~μT-1）|2~ΣT-1Λ-1</font></font></font></font></font></font> + I |</span> </div><div style="position:absolute;top:4780;left:394"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:4775;left:405"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:4786;left:405"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:4807;left:151"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>× exp (</nobr> <font style="font-size:8px">1</font></span> <nobr>×exp（</nobr> <font style="font-size:8px">1</font></span> </div><div style="position:absolute;top:4816;left:200"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:4808;left:207"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>z</nobr> <font style="font-size:8px">ij</font> (2 ˜Σ <font style="font-size:8px">t−1</font> Λ <font style="font-size:8px">−1</font> + I) <font style="font-size:8px">−1</font> ˜Σ <font style="font-size:8px">t−1</font> z <font style="font-size:8px">ij</font></span> <nobr><font style="font-size:8px"><font style="font-size:8px"><font style="font-size:8px">žIJ（2~ΣT-1Λ-1</font></font></font></nobr> + <font style="font-size:8px">I）-1~Σ</font> <font style="font-size:8px">叔1</font> Z <font style="font-size:8px">-</font> <font style="font-size:8px">IJ</font></span> </div><div style="position:absolute;top:4806;left:398"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>)</nobr></span> <nobr>）</nobr></span> </div><div style="position:absolute;top:3828;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with ζ</nobr> <font style="font-size:8px">i</font> := (x <font style="font-size:8px">i</font> − ˜µ <font style="font-size:8px">t−1</font> ) and z <font style="font-size:8px">ij</font> := Λ <font style="font-size:8px">−1</font> (ζ <font style="font-size:8px">i</font> + ζ <font style="font-size:8px">j</font> ).</span> <nobr>与<font style="font-size:8px">ζI：=（X</font></nobr> <font style="font-size:8px">I</font> - <font style="font-size:8px">~μT-1）</font>和z <font style="font-size:8px"><font style="font-size:8px"><font style="font-size:8px">IJ：=Λ-1（ζ1</font></font></font> <font style="font-size:8px">+ζj）的</font> 。</span> </div><div style="position:absolute;top:3846;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Note that both µ</nobr> <font style="font-size:8px">∆</font> and σ <font style="font-size:8px">2</font></span> <nobr>注意，这两个<font style="font-size:8px">μΔ</font></nobr>和<font style="font-size:8px">σ2</font></span> </div><div style="position:absolute;top:3853;left:630"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∆</nobr> <font style="font-size:12px">are functionally dependent on the</font></span> <nobr>Δ</nobr> <font style="font-size:12px">在功能上依赖于</font></span> </div><div style="position:absolute;top:3864;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>mean µ</nobr> <font style="font-size:8px">u</font> and the covariance Σ <font style="font-size:8px">u</font> of the control signal through</span> <nobr>意味着<font style="font-size:8px">μu</font></nobr>和通过控制信号的协方差<font style="font-size:8px">Σù</font></span> </div><div style="position:absolute;top:3881;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>˜µ</nobr> <font style="font-size:8px">t−1</font> and˜Σ <font style="font-size:8px">t−1</font> , respectively, see Eqs.</span> <nobr>~μt</nobr> <font style="font-size:8px">-1和</font> 〜Σt <font style="font-size:8px">-1</font> ，分别见方程</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#4">(10</a> ) and (11).</span> <a href="#4">（10</a> ）和（11）。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">We can</span>我们可以</span> </div><div style="position:absolute;top:3900;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>see from Eqs.</nobr></span> <nobr>从方程式看</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">10)</a> and (11) that the uncertainty about the</span> <nobr>（</nobr> <a href="#4">10）</a>和（11）的不确定性</span> </div><div style="position:absolute;top:3918;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>latent function f (according to the GP posterior) is integrated</nobr></span> <nobr>潜伏函数f（根据GP后验）被综合</nobr></span> </div><div style="position:absolute;top:3935;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>out, which explicitly accounts for model uncertainty.</nobr></span> <nobr>这明确地解释了模型的不确定性。</nobr></span> </div><div style="position:absolute;top:3964;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>C. Controller Learning through Indirect Policy Search</nobr></span> <nobr>C.通过间接策略搜索控制器学习</nobr></span> </div><div style="position:absolute;top:3987;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>From Sec.</nobr></span> <nobr>从第二部分</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#3">IV-B</a> we know how to cascade one-step pre-</span> <a href="#3">IV-B</a>我们知道如何级联一步预处理，</span> </div><div style="position:absolute;top:4005;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dictions to obtain Gaussian approximations to the predictive</nobr></span> <nobr>字典获得高斯近似的预测</nobr></span> </div><div style="position:absolute;top:4023;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>distributions p(x</nobr> <font style="font-size:8px">1</font> ),...,p(x <font style="font-size:8px">T</font> ).</span> <nobr>分布p（x</nobr> <font style="font-size:8px">1</font> ），...，p（x <font style="font-size:8px">T</font> ）。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">To evaluate the expected return</span>评估预期回报</span> </div><div style="position:absolute;top:4040;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>J</nobr> <font style="font-size:8px">π</font> in Eq.</span> <nobr><font style="font-size:8px">Ĵπ</font></nobr>公式。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#3">(1</a> ), it remains to compute the expected values</span> <a href="#3">（1</a> ），它仍然是计算期望值</span> </div><div style="position:absolute;top:4074;left:531"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>E</nobr> <font style="font-size:8px">x</font> <font style="font-size:5px">t</font> [c(x <font style="font-size:8px">t</font> )] =</span> <nobr>E</nobr> <font style="font-size:8px">x</font> <font style="font-size:5px">t</font> [c（x <font style="font-size:8px">t</font> ）] =</span> </div><div style="position:absolute;top:4064;left:615"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∫</nobr></span> <nobr>∫</nobr></span> </div><div style="position:absolute;top:4074;left:633"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c(x</nobr> <font style="font-size:8px">t</font> )N(x <font style="font-size:8px">t</font> | µ <font style="font-size:8px">t</font> , Σ <font style="font-size:8px">t</font> ) dx <font style="font-size:8px">t</font></span> <nobr>C（X</nobr> <font style="font-size:8px">T）N（X</font> <font style="font-size:8px">T</font> <font style="font-size:8px"><font style="font-size:8px">|μT，Σt）</font>的</font> DX <font style="font-size:8px">吨</font></span> </div><div style="position:absolute;top:4074;left:820"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(12)</nobr></span> <nobr>（12）</nobr></span> </div><div style="position:absolute;top:4108;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of the instantaneous cost c with respect to the predictive state</nobr></span> <nobr>的瞬时成本c相对于预测状态</nobr></span> </div><div style="position:absolute;top:4126;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>distributions.</nobr></span> <nobr>分布。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We assume that the cost function c is chosen</nobr></span> <nobr>我们假设选择了成本函数c</nobr></span> </div><div style="position:absolute;top:4144;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>such that this integral can be solved analytically.</nobr></span> <nobr>使得这个积分可以通过分析来解决。</nobr></span> </div><div style="position:absolute;top:4162;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>To apply a gradient-based policy search to find controller</nobr></span> <nobr>应用基于渐变的策略搜索来查找控制器</nobr></span> </div><div style="position:absolute;top:4180;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>parameters ψ that minimize J</nobr> <font style="font-size:8px">π</font> , see Eq.</span> <nobr>参数ψ最小化<font style="font-size:8px">Ĵπ，</font></nobr>见方程</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">( <a href="#3">1</a> ), we first swap</span> （ <a href="#3">1</a> ），我们先交换</span> </div><div style="position:absolute;top:4198;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the order of differentiation and summation in Eq.</nobr></span> <nobr>方程中的差分和求和的顺序。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#3">1</a> ).</span> <nobr>（</nobr> <a href="#3">1</a> ）。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">With</span>同</span> </div><div style="position:absolute;top:4214;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>E</nobr> <font style="font-size:8px">t</font> := E <font style="font-size:8px">x</font> <font style="font-size:5px">t</font> [c(x <font style="font-size:8px">t</font> )] we obtain</span> <nobr>E</nobr> <font style="font-size:8px">t</font> ：= E <font style="font-size:8px">x</font> <font style="font-size:5px">t</font> [c（x <font style="font-size:8px">t</font> ）]我们得到</span> </div><div style="position:absolute;top:4240;left:565"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dE</nobr> <font style="font-size:8px">t</font></span> <nobr>dE</nobr> <font style="font-size:8px">t</font></span> </div><div style="position:absolute;top:4260;left:566"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dψ</nobr></span> <nobr>dψ</nobr></span> </div><div style="position:absolute;top:4250;left:593"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>=</nobr></span> <nobr>=</nobr></span> </div><div style="position:absolute;top:4240;left:612"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∂E</nobr> <font style="font-size:8px">t</font></span> <nobr>EE</nobr> <font style="font-size:8px">t</font></span> </div><div style="position:absolute;top:4260;left:610"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∂µ</nobr> <font style="font-size:8px">t</font></span> <nobr><font style="font-size:8px">∂μŤ</font></nobr></span> </div><div style="position:absolute;top:4240;left:639"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dµ</nobr> <font style="font-size:8px">t</font></span> <nobr><font style="font-size:8px">dμŤ</font></nobr></span> </div><div style="position:absolute;top:4260;left:640"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dψ</nobr></span> <nobr>dψ</nobr></span> </div><div style="position:absolute;top:4250;left:668"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>+</nobr></span> <nobr>+</nobr></span> </div><div style="position:absolute;top:4240;left:687"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∂E</nobr> <font style="font-size:8px">t</font></span> <nobr>EE</nobr> <font style="font-size:8px">t</font></span> </div><div style="position:absolute;top:4260;left:685"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∂Σ</nobr> <font style="font-size:8px">t</font></span> <nobr><font style="font-size:8px">∂ΣŤ</font></nobr></span> </div><div style="position:absolute;top:4240;left:715"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dΣ</nobr> <font style="font-size:8px">t</font></span> <nobr><font style="font-size:8px">DσŤ</font></nobr></span> </div><div style="position:absolute;top:4260;left:717"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dψ</nobr></span> <nobr>dψ</nobr></span> </div><div style="position:absolute;top:4250;left:745"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>.</nobr></span> <nobr>。</nobr></span> </div><div style="position:absolute;top:4250;left:820"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(13)</nobr></span> <nobr>（13）</nobr></span> </div><div style="position:absolute;top:4285;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The total derivatives of the mean µ</nobr> <font style="font-size:8px">t</font> and the covariance Σ <font style="font-size:8px">t</font></span> <nobr>均值<font style="font-size:8px">μT</font>的总衍生物</nobr>和协方差Σ <font style="font-size:8px">吨</font></span> </div><div style="position:absolute;top:4302;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of p(x</nobr> <font style="font-size:8px">t</font> ) with respect to the policy parameters ψ can be</span> <nobr>p（x</nobr> <font style="font-size:8px">t</font> ）关于策略参数ψ可以是</span> </div><div style="position:absolute;top:4320;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>computed analytically by repeated application of the chain-rule</nobr></span> <nobr>通过重复应用链式规则进行分析计算</nobr></span> </div><div style="position:absolute;top:4338;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to Eqs.</nobr></span> <nobr>到等式</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">7</a> ), (8), (9), (10), (11).</span> <nobr>（</nobr> <a href="#4">7</a> ），（8），（9），（10），（11）。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">This also involves computing</span>这也涉及到计算</span> </div><div style="position:absolute;top:4356;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the partial derivatives of ∂µ</nobr> <font style="font-size:8px">u</font> /∂ψ and ∂Σ <font style="font-size:8px">u</font> /∂ψ.</span> <nobr><font style="font-size:8px">∂μU</font></nobr> /∂ψ和<font style="font-size:8px">∂ΣU</font> /∂ψ <nobr>的偏导数</nobr> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">We omit</span>我们省略</span> </div><div style="position:absolute;top:4374;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>further lengthy details here, but point out that these derivatives</nobr></span> <nobr>这里有更详尽的细节，但是指出这些衍生物</nobr></span> </div><div style="position:absolute;top:4392;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>are computed analytically [</nobr> <a href="#8">6</a> , 7].</span> <nobr>是分析计算[</nobr> <a href="#8">6,7</a> ]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">This allows for standard</span>这允许标准</span> </div><div style="position:absolute;top:4410;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>gradient-based non-convex optimization methods, eg, CG or</nobr></span> <nobr>基于梯度的非凸优化方法，例如CG或</nobr></span> </div><div style="position:absolute;top:4428;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>L-BFGS, which return an optimized parameter vector ψ</nobr></span> <nobr>L-BFGS，它返回一个优化的参数矢量ψ</nobr></span> </div><div style="position:absolute;top:4424;left:815"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∗</nobr></span> <nobr>*</nobr></span> </div><div style="position:absolute;top:4428;left:822"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>.</nobr></span> <nobr>。</nobr></span> </div><div style="position:absolute;top:4457;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>D. Planning with State-Space Constraints</nobr></span> <nobr>D.规划与国家空间约束</nobr></span> </div><div style="position:absolute;top:4479;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In a classical RL setup, it is assumed that the learner is not</nobr></span> <nobr>在经典的RL设置中，假定学习者不是</nobr></span> </div><div style="position:absolute;top:4497;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>aware of any constraints in the state space, but has to discover</nobr></span> <nobr>意识到状态空间中的任何约束，但必须发现</nobr></span> </div><div style="position:absolute;top:4515;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>walls etc. by running into them and gaining a high penalty.</nobr></span> <nobr>进入他们的墙壁等，并获得高罚款。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In</nobr></span> <nobr>在</nobr></span> </div><div style="position:absolute;top:4533;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a robotic setup, this general, but not necessary, assumption is</nobr></span> <nobr>机器人设置，这个一般的，但不是必要的假设是</nobr></span> </div><div style="position:absolute;top:4551;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>less desirable because the robot can be damaged.</nobr></span> <nobr>不太理想，因为机器人可能会损坏。</nobr></span> </div><div style="position:absolute;top:4569;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>If constraints (eg, obstacles) in the state space are known</nobr></span> <nobr>如果状态空间中的约束（例如，障碍物）是已知的</nobr></span> </div><div style="position:absolute;top:4587;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a priori, we would like to incorporate this prior knowledge</nobr></span> <nobr>先验，我们想要纳入这个先前的知识</nobr></span> </div><div style="position:absolute;top:4605;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>directly into planning and policy learning.</nobr></span> <nobr>直接进入规划和政策学习。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We propose to</nobr></span> <nobr>我们建议</nobr></span> </div><div style="position:absolute;top:4623;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>define obstacles as “undesirable” regions, ie, regions the robot</nobr></span> <nobr>将障碍物定义为“不良”地区，即机器人地区</nobr></span> </div><div style="position:absolute;top:4641;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>is supposed to avoid.</nobr></span> <nobr>应该避免。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We define “undesirability” as a penalty</nobr></span> <nobr>我们将“不合要求”定义为惩罚</nobr></span> </div><div style="position:absolute;top:4659;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>in the instantaneous cost function c, which we re-define as</nobr></span> <nobr>在瞬时成本函数c中，我们将其重新定义为</nobr></span> </div><div style="position:absolute;top:4690;left:524"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c(x) = −</nobr></span> <nobr>c（x）=  - </nobr></span> </div><div style="position:absolute;top:4686;left:585"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∑</nobr> <font style="font-size:8px">K</font></span> <nobr><font style="font-size:8px">Σķ</font></nobr></span> </div><div style="position:absolute;top:4702;left:607"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k=1</nobr></span> <nobr>k = 1时</nobr></span> </div><div style="position:absolute;top:4690;left:632"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c</nobr> <font style="font-size:8px">+</font></span> <nobr>c</nobr> <font style="font-size:8px">+</font></span> </div><div style="position:absolute;top:4698;left:638"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">(x) +</font></span> <nobr>k</nobr> <font style="font-size:12px">（x）+</font></span> </div><div style="position:absolute;top:4686;left:687"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∑</nobr> <font style="font-size:8px">J</font></span> <nobr><font style="font-size:8px">ΣĴ</font></nobr></span> </div><div style="position:absolute;top:4702;left:709"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j=1</nobr></span> <nobr>J = 1</nobr></span> </div><div style="position:absolute;top:4690;left:733"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ι</nobr> <font style="font-size:8px">j</font> c <font style="font-size:8px">−</font></span> <nobr><font style="font-size:8px">ιĴÇ</font></nobr> <font style="font-size:8px">-</font></span> </div><div style="position:absolute;top:4697;left:751"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">(x) ,</font></span> <nobr>j</nobr> <font style="font-size:12px">（x），</font></span> </div><div style="position:absolute;top:4690;left:820"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(14)</nobr></span> <nobr>（14）</nobr></span> </div><div style="position:absolute;top:4724;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>where c</nobr> <font style="font-size:8px">+</font></span> <nobr>在哪里c</nobr> <font style="font-size:8px">+</font></span> </div><div style="position:absolute;top:4732;left:515"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">are desirable states (eg, the target state) and c</font></span> <nobr>k</nobr> <font style="font-size:12px">是期望的状态（例如，目标状态）和c</font></span> </div><div style="position:absolute;top:4720;left:812"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:4731;left:812"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">are</font></span> <nobr>j</nobr> <font style="font-size:12px">是</font></span> </div><div style="position:absolute;top:4742;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>undesirable states (eg, obstacles), weighted by ι</nobr> <font style="font-size:8px">j</font> ≥ 0. Bigger</span> <nobr>不期望的状态（例如，障碍物），通过<font style="font-size:8px">ιĴ</font>加权</nobr> ≥0更大的</span> </div><div style="position:absolute;top:4760;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>values for ι</nobr> <font style="font-size:8px">j</font> make the policy more averse to a particular</span> <nobr>为ι</nobr> <font style="font-size:8px"><nobr>的</nobr> J</font> <nobr>值</nobr>使政策更加反感，特定</span> </div><div style="position:absolute;top:4778;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>undesirable state.</nobr></span> <nobr>不良状态。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In this paper, we always set ι</nobr> <font style="font-size:8px">j</font> = 1. For</span> <nobr>在本文中，我们始终设置<font style="font-size:8px">ιJ</font></nobr> = 1。</span> </div><div style="position:absolute;top:4796;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c</nobr> <font style="font-size:8px">+</font></span> <nobr>c</nobr> <font style="font-size:8px">+</font></span> </div><div style="position:absolute;top:4804;left:474"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">and c</font></span> <nobr>k</nobr> <font style="font-size:12px">和c</font></span> </div><div style="position:absolute;top:4792;left:524"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:4803;left:524"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">, we choose squared-exponentials, which trade off</font></span> <nobr>j</nobr> <font style="font-size:12px">，我们选择平方指数，折衷</font></span> </div><div style="position:absolute;top:4814;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>exploration with exploitation when averaging according to the</nobr></span> <nobr>勘探与开采时平均按照</nobr></span> </div><div style="position:absolute;top:4134;left:350"><a href="#3" style="background-color:#0000ff;padding:17px 11px;"></a></div><div style="position:absolute;top:3876;left:769"><a href="#4" style="background-color:#0000ff;padding:22px 18px;"></a></div><div style="position:absolute;top:3898;left:628"><a href="#4" style="background-color:#0000ff;padding:16px 18px;"></a></div><div style="position:absolute;top:4337;left:547"><a href="#4" style="background-color:#0000ff;padding:16px 10px;"></a></div><div style="position:absolute;top:4337;left:573"><a href="#4" style="background-color:#0000ff;padding:16px 11px;"></a></div><div style="position:absolute;top:4337;left:600"><a href="#4" style="background-color:#0000ff;padding:16px 18px;"></a></div><div style="position:absolute;top:4337;left:634"><a href="#4" style="background-color:#0000ff;padding:16px 18px;"></a></div><div style="position:absolute;top:4391;left:662"><a href="#8" style="background-color:#0000ff;padding:13px 10px;"></a></div><div style="position:absolute;top:4927;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="5"><b>Page 5</b></a></font></span> <font face="arial,sans-serif"><a name="5"><b>第5页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:5135;left:150"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−5</nobr></span> <nobr>-5</nobr></span> </div><div style="position:absolute;top:5135;left:179"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−4</nobr></span> <nobr>-4</nobr></span> </div><div style="position:absolute;top:5135;left:208"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−3</nobr></span> <nobr>-3</nobr></span> </div><div style="position:absolute;top:5135;left:236"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−2</nobr></span> <nobr>-2</nobr></span> </div><div style="position:absolute;top:5135;left:265"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−1</nobr></span> <nobr>-1</nobr></span> </div><div style="position:absolute;top:5135;left:299"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:5135;left:327"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:5135;left:356"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:5135;left:385"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>3</nobr></span> <nobr>3</nobr></span> </div><div style="position:absolute;top:5100;left:140"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.5</nobr></span> <nobr>-0.5</nobr></span> </div><div style="position:absolute;top:5070;left:152"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:5041;left:145"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.5</nobr></span> <nobr>0.5</nobr></span> </div><div style="position:absolute;top:5143;left:260"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>state x</nobr></span> <nobr>状态x</nobr></span> </div><div style="position:absolute;top:5075;left:136"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>cost</nobr></span> <nobr>成本</nobr></span> </div><div style="position:absolute;top:5082;left:172"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c1+</nobr></span> <nobr>C1 +</nobr></span> </div><div style="position:absolute;top:5095;left:172"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c1−</nobr></span> <nobr>C1-</nobr></span> </div><div style="position:absolute;top:5109;left:172"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c2−</nobr></span> <nobr>C2-</nobr></span> </div><div style="position:absolute;top:5124;left:172"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c</nobr></span> <nobr>C</nobr></span> </div><div style="position:absolute;top:5170;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig. 3. Cost function that takes constraints (eg, obstacles) into account</nobr></span> <nobr>图3.考虑约束（例如障碍）的成本函数</nobr></span> </div><div style="position:absolute;top:5184;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>by making them “undesirable”.</nobr></span> <nobr>通过使他们“不受欢迎”。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The non-solid curves are the individual</nobr></span> <nobr>非实曲线是个体</nobr></span> </div><div style="position:absolute;top:5197;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>components c</nobr> <font style="font-size:6px">−</font></span> <nobr>组件c</nobr> <font style="font-size:6px">-</font></span> </div><div style="position:absolute;top:5203;left:141"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:9px">and c</font> +</span> <nobr>j</nobr> <font style="font-size:9px">和c</font> +</span> </div><div style="position:absolute;top:5203;left:181"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:9px">, see Eq.</font></span> <nobr>k</nobr> <font style="font-size:9px">，参见方程</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:9px"><a href="#4">(14)</a> , the solid curve is their sum c.</font></span> <font style="font-size:9px"><a href="#4">（14）</a> ，实曲线是它们的和。</font></span> </div><div style="position:absolute;top:5250;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>state distribution</nobr> <a href="#8">[</a> 6].</span> <nobr>状态分布</nobr> <a href="#8">[</a> 6]。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The squared exponentials are unnormal-</span>平方的指数是不正常的 - </span> </div><div style="position:absolute;top:5267;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ized with potentially different widths Σ</nobr> <font style="font-size:8px">+</font></span> <nobr>可能具有不同的宽度Σ</nobr> <font style="font-size:8px">+</font></span> </div><div style="position:absolute;top:5275;left:320"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">.</font></span> <nobr>k</nobr> <font style="font-size:12px">。</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px">The widths of the</font></span> <font style="font-size:12px">的宽度</font></span> </div><div style="position:absolute;top:5285;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>individual constraints c</nobr></span> <nobr>个人约束c</nobr></span> </div><div style="position:absolute;top:5281;left:213"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:5292;left:213"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">define how “soft” the constraints are.</font></span> <nobr>Ĵ</nobr> <font style="font-size:12px">定义的约束如何“软”的。</font></span> </div><div style="position:absolute;top:5303;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Hard constraints would be described by very peaked squared</nobr></span> <nobr>严格的限制可以用非常高的平方来描述</nobr></span> </div><div style="position:absolute;top:5321;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>exponentials c</nobr></span> <nobr>指数</nobr></span> </div><div style="position:absolute;top:5317;left:162"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:5328;left:162"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">with ι</font> j <font style="font-size:12px">→ ∞.</font></span> <nobr>j</nobr> <font style="font-size:12px">与ιj</font> <font style="font-size:12px">→∞。</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px">The idea is related to [ <a href="#8">24]</a> ,</font></span> <font style="font-size:12px">这个想法与[ <a href="#8">24]有关</a> ，</font></span> </div><div style="position:absolute;top:5339;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>where planning is performed with fully known dynamics and</nobr></span> <nobr>在那里用完全已知的动态进行计划</nobr></span> </div><div style="position:absolute;top:5357;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a piecewise linear controller.</nobr></span> <nobr>分段线性控制器。</nobr></span> </div><div style="position:absolute;top:5375;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig.</nobr> <a href="#5">3</a> illustrates Eq.</span> <nobr>图</nobr> <a href="#5">3</a>说明了等式</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">(14) with two penalties c</span> （14）两罚二c</span> </div><div style="position:absolute;top:5371;left:384"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:5382;left:384"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">and one</font></span> <nobr>j</nobr> <font style="font-size:12px">和一个</font></span> </div><div style="position:absolute;top:5394;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>reward c</nobr> <font style="font-size:8px">+</font></span> <nobr>奖励c</nobr> <font style="font-size:8px">+</font></span> </div><div style="position:absolute;top:5402;left:125"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">.</font></span> <nobr>k</nobr> <font style="font-size:12px">。</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px">The figure shows that if an undesirable state and a</font></span> <font style="font-size:12px">该图显示，如果一个不良状态和一个</font></span> </div><div style="position:absolute;top:5412;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>desirable state are close, the total cost c somewhat trades off</nobr></span> <nobr>理想状态接近，总成本c有所折让</nobr></span> </div><div style="position:absolute;top:5430;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>between both objectives.</nobr></span> <nobr>在两个目标之间。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Furthermore, the optimal state x</nobr> <font style="font-size:8px">∗</font> ∈</span> <nobr>此外，最优状态x</nobr> <font style="font-size:8px">*</font> ∈</span> </div><div style="position:absolute;top:5448;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>arg min</nobr> <font style="font-size:8px">x</font> c(x) no longer corresponds to x <font style="font-size:8px">+</font></span> <nobr>arg min</nobr> <font style="font-size:8px">x</font> c（x）不再对应于x <font style="font-size:8px">+</font></span> </div><div style="position:absolute;top:5454;left:323"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∗</nobr> <font style="font-size:12px">∈ arg min</font> x <font style="font-size:12px">c</font> + <font style="font-size:12px">(x):</font></span> <nobr>*</nobr> <font style="font-size:12px">∈arg min</font> x <font style="font-size:12px">c</font> + <font style="font-size:12px">（x）：</font></span> </div><div style="position:absolute;top:5466;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Moving a little bit away from the target state (away from the</nobr></span> <nobr>从目标状态移开一点点（远离目标状态）</nobr></span> </div><div style="position:absolute;top:5484;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>undesirable state) is optimal.</nobr></span> <nobr>不良状态）是最佳的。</nobr></span> </div><div style="position:absolute;top:5502;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The expectations of the cost in Eq.</nobr></span> <nobr>方程中的成本预期</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">14)</a> and the derivatives</span> <nobr>（</nobr> <a href="#4">14）</a>和衍生物</span> </div><div style="position:absolute;top:5520;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with respect to the mean µ</nobr> <font style="font-size:8px">t</font> and the covariance Σ <font style="font-size:8px">t</font> of the state</span> <nobr>相对于平均值<font style="font-size:8px">μt</font></nobr>和状态的协方差Σ <font style="font-size:8px">吨</font></span> </div><div style="position:absolute;top:5538;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>distribution p(x</nobr> <font style="font-size:8px">t</font> ) can be computed for each individual c <font style="font-size:8px">+</font></span>可以为每个个体c <font style="font-size:8px">+</font>计算<nobr>分布p（x</nobr> <font style="font-size:8px">t</font> ）</span> </div><div style="position:absolute;top:5545;left:414"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">and</font></span> <nobr>k</nobr> <font style="font-size:12px">和</font></span> </div><div style="position:absolute;top:5555;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>c</nobr> <font style="font-size:8px">−</font></span> <nobr>c</nobr> <font style="font-size:8px">-</font></span> </div><div style="position:absolute;top:5563;left:80"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr> <font style="font-size:12px">and summed up.</font></span> <nobr>j</nobr> <font style="font-size:12px">并总结。</font></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font style="font-size:12px">Then, we apply the chain-rule according</font></span> <font style="font-size:12px">然后，我们应用链规则</font></span> </div><div style="position:absolute;top:5574;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to Eq.</nobr></span> <nobr>到Eq。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">13)</a> for the gradient-based policy search.</span> <nobr>（</nobr> <a href="#4">13）</a>用于基于梯度的策略搜索。</span> </div><div style="position:absolute;top:5592;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Phrasing constraints in terms of undesirability in the cost</nobr></span> <nobr>在成本不合要求方面的措词限制</nobr></span> </div><div style="position:absolute;top:5610;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>function in Eq.</nobr></span> <nobr>功能在Eq。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">14)</a> still allows for fully probabilistic long-</span> <nobr>（</nobr> <a href="#4">14）</a>仍然允许完全概率的长期</span> </div><div style="position:absolute;top:5627;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>term planning and for a guidance of the robot through the state</nobr></span> <nobr>长期规划和通过国家对机器人的指导</nobr></span> </div><div style="position:absolute;top:5645;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>space without “experiencing” obstacles by running into them.</nobr></span> <nobr>遇到障碍而没有遇到障碍。</nobr></span> </div><div style="position:absolute;top:5663;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Collisions within a Bayesian inference framework can be</nobr></span> <nobr>贝叶斯推断框架内的碰撞可以是</nobr></span> </div><div style="position:absolute;top:5681;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>discouraged, but not strictly excluded in expectation.</nobr></span> <nobr>气馁，但并不严格排除期望。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>This</nobr></span> <nobr>这个</nobr></span> </div><div style="position:absolute;top:5699;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>does not mean that averaging out uncertainties is wrong—</nobr></span> <nobr>并不意味着平均不确定性是错误的 - </nobr></span> </div><div style="position:absolute;top:5717;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>it rather tells us that it is not expected to violate constraints</nobr></span> <nobr>而是告诉我们，这不会违反约束条件</nobr></span> </div><div style="position:absolute;top:5735;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>with a certain confidence.</nobr></span> <nobr>有一定的信心。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A faithful description of predictive</nobr></span> <nobr>忠实的预测性描述</nobr></span> </div><div style="position:absolute;top:5753;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>uncertainty is often more worth than claiming full confidence</nobr></span> <nobr>不确定性往往比声称充满信心更有价值</nobr></span> </div><div style="position:absolute;top:5771;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and occasionally violating constraints unexpectedly.</nobr></span> <nobr>偶然会违反约束条件。</nobr></span> </div><div style="position:absolute;top:5800;left:159"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>V. E</nobr> <font style="font-size:9px">XPERIMENTAL</font> V <font style="font-size:9px">ALIDATION</font></span> <nobr>。E</nobr> <font style="font-size:9px">XPERIMENTAL</font> V <font style="font-size:9px">ALIDATION</font></span> </div><div style="position:absolute;top:5823;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In the following, we analyze</nobr> <font style="font-size:9px">PILCO</font> ’s performance on the</span> <nobr>下面我们分析一下</nobr> <font style="font-size:9px">PILCO</font>的表现</span> </div><div style="position:absolute;top:5841;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>task of learning to stack a tower of six foam blocks B1–B6</nobr></span> <nobr>学习堆叠六个泡沫块B1-B6的塔的任务</nobr></span> </div><div style="position:absolute;top:5859;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(bottom to top), see Fig</nobr> <a href="#1">.</a></span> <nobr>（从下到上），见图</nobr> <a href="#1">。</a></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">1. The tower's bottom block B1 was</span>塔的底部B1是</span> </div><div style="position:absolute;top:5877;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>given.</nobr></span> <nobr>给出。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>To apply</nobr> <font style="font-size:9px">PILCO</font> , we need to specify the initial state</span> <nobr>要应用</nobr> <font style="font-size:9px">PILCO</font> ，我们需要指定初始状态</span> </div><div style="position:absolute;top:5894;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>distribution, the target state, the cost function, the controller</nobr></span> <nobr>分配，目标状态，成本函数，控制器</nobr></span> </div><div style="position:absolute;top:5912;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>parametrization, and optionally obstacles.</nobr></span> <nobr>参数化和可选的障碍物。</nobr></span> </div><div style="position:absolute;top:5930;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>As an initial state distribution, we chose p(x</nobr> <font style="font-size:8px">0</font> ) =</span> <nobr>作为初始状态分布，我们选择p（x</nobr> <font style="font-size:8px">0</font> ）=</span> </div><div style="position:absolute;top:5947;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>N(x</nobr> <font style="font-size:8px">0</font> | µ <font style="font-size:8px">0</font> , Σ <font style="font-size:8px">0</font> ) with µ <font style="font-size:8px">0</font> being a single (noisy) measurement</span> <nobr>N（X</nobr> <font style="font-size:8px">0</font> <font style="font-size:8px"><font style="font-size:8px">|μ0，Σ0）</font></font>与<font style="font-size:8px">μ0</font>是单（嘈杂）测量</span> </div><div style="position:absolute;top:5966;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of the initial block location using the tracking method from</nobr></span> <nobr>的初始块位置使用跟踪方法从</nobr></span> </div><div style="position:absolute;top:5984;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Sec.</nobr></span> <nobr>秒。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#2">III-B</a> .</span> <a href="#2">III-B</a> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The initial covariance Σ <font style="font-size:8px">0</font> was diagonal with the</span>初始协方差<font style="font-size:8px">Σ0</font>是对角与所述</span> </div><div style="position:absolute;top:6002;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>95%-confidence bounds being the edge length b of the block.</nobr></span> <nobr>95％置信区间是块的边长b。</nobr></span> </div><div style="position:absolute;top:5018;left:709"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>initial state</nobr></span> <nobr>初始状态</nobr></span> </div><div style="position:absolute;top:5105;left:648"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>target</nobr></span> <nobr>目标</nobr></span> </div><div style="position:absolute;top:5026;left:552"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>camera</nobr></span> <nobr>相机</nobr></span> </div><div style="position:absolute;top:5200;left:480"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig. 4. Learning setup 1: The initial position is above the tower's top.</nobr></span> <nobr>图4.学习设置1：初始位置在塔顶上方。</nobr></span> </div><div style="position:absolute;top:5248;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The target state was set to a single noisy measurement using</nobr></span> <nobr>目标状态被设置为单次噪声测量</nobr></span> </div><div style="position:absolute;top:5266;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the tracking method from Sec.</nobr></span> <nobr>来自Sec。的跟踪方法。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#2">III-B.</a></span> <a href="#2">III-B。</a></span> </div><div style="position:absolute;top:5286;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The first term of the immediate cost in Eq.</nobr></span> <nobr>方程式中直接成本的第一项</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(</nobr> <a href="#4">14)</a> that de-</span> <nobr>（</nobr> <a href="#4">14）</a></span> </div><div style="position:absolute;top:5304;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>scribes favorable states was set to −</nobr> <font style="font-size:8px">1</font></span> <nobr>文士有利的国家被设置为</nobr> <font style="font-size:8px">-1</font></span> </div><div style="position:absolute;top:5312;left:696"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>4</nobr></span> <nobr>4</nobr></span> </div><div style="position:absolute;top:5302;left:707"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>∑</nobr> <font style="font-size:8px">4</font></span> <nobr><font style="font-size:8px">Σ4</font></nobr></span> </div><div style="position:absolute;top:5311;left:722"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k=1</nobr> <font style="font-size:12px">exp(−</font> 1</span> <nobr>k = 1</nobr> <font style="font-size:12px">exp（ -</font> 1</span> </div><div style="position:absolute;top:5312;left:789"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:5303;left:797"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>d</nobr> <font style="font-size:8px">2</font> /σ <font style="font-size:8px">2</font></span> <nobr>d</nobr> <font style="font-size:8px">2</font> <font style="font-size:8px">/σ2</font></span> </div><div style="position:absolute;top:5311;left:828"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>k</nobr> <font style="font-size:12px">),</font></span> <nobr>k</nobr> <font style="font-size:12px">），</font></span> </div><div style="position:absolute;top:5321;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>where d := x</nobr> <font style="font-size:8px">t</font> − x <font style="font-size:8px">target</font></span> <nobr>其中d：= x</nobr> <font style="font-size:8px">t</font> -  x <font style="font-size:8px">目标</font></span> </div><div style="position:absolute;top:5321;left:644"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and σ</nobr> <font style="font-size:8px">k</font> = { <font style="font-size:8px">1</font></span> <nobr>和<font style="font-size:8px">σK</font></nobr> = <font style="font-size:8px">{1</font></span> </div><div style="position:absolute;top:5330;left:727"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>4</nobr></span> <nobr>4</nobr></span> </div><div style="position:absolute;top:5321;left:735"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>b,</nobr> <font style="font-size:8px">1</font></span> <nobr>b，</nobr> <font style="font-size:8px">1</font></span> </div><div style="position:absolute;top:5330;left:750"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr> <font style="font-size:12px">b, b, 2b}, k =</font></span> <nobr>2</nobr> <font style="font-size:12px">B，B，2B}，K =</font></span> </div><div style="position:absolute;top:5339;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1,..., 4, and b being the edge length of the foam block.</nobr></span> <nobr>1，...，4，b是泡沫块的边缘长度。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The</nobr></span> <nobr>该</nobr></span> </div><div style="position:absolute;top:5357;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>scale mixture of squared exponentials makes the choice of a</nobr></span> <nobr>大小的平方指数的混合使得选择a</nobr></span> </div><div style="position:absolute;top:5375;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>single σ</nobr> <font style="font-size:8px">k</font> less important and yields non-zero policy gradients</span> <nobr>单<font style="font-size:8px">σķ</font></nobr>不太重要的，并产生非零的政策梯度</span> </div><div style="position:absolute;top:5393;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dJ/ dψ even relatively far away from the target x</nobr> <font style="font-size:8px">target</font> .</span> <nobr>dJ /dψ离目标x</nobr> <font style="font-size:8px">目标</font> <nobr>相对较远</nobr> 。</span> </div><div style="position:absolute;top:5412;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We used linear controllers, ie, π(x) = u = Ax + b, and</nobr></span> <nobr>我们使用线性控制器，即π（x）= u = Ax + b和</nobr></span> </div><div style="position:absolute;top:5430;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>initialized the controller parameters ψ = {A, b} ∈ R</nobr> <font style="font-size:8px">16</font> to 0.</span> <nobr>初始化控制器参数ψ= {A，b}∈R</nobr> <font style="font-size:8px">16</font>为0。</span> </div><div style="position:absolute;top:5450;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The Euclidean distance d of the end effector from the</nobr></span> <nobr>从末端执行器的欧几里得距离d</nobr></span> </div><div style="position:absolute;top:5467;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>camera was approximately 0.7m–2.0m, depending on the</nobr></span> <nobr>相机约0.7米-2.0米，这取决于</nobr></span> </div><div style="position:absolute;top:5485;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>robot configuration.</nobr></span> <nobr>机器人配置。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Both the control sampling frequency and</nobr></span> <nobr>控制采样频率和</nobr></span> </div><div style="position:absolute;top:5503;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the time discretization ∆</nobr> <font style="font-size:8px">t</font> were set to rather slow 2 Hz;</span> <nobr>时间离散</nobr> <font style="font-size:8px">Δt</font>被设定为相当慢的2Hz;</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">the</span>该</span> </div><div style="position:absolute;top:5521;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>planning/episode length T was 5s.</nobr></span> <nobr>计划/插曲长度T是5s。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>After 5s, the robot opened</nobr></span> <nobr>5秒后，机器人开了</nobr></span> </div><div style="position:absolute;top:5539;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the gripper and freed the block.</nobr></span> <nobr>抓手和释放块。</nobr></span> </div><div style="position:absolute;top:5558;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The motion of the block grasped by the end effector was</nobr></span> <nobr>由末端执行器抓住的块的运动是</nobr></span> </div><div style="position:absolute;top:5576;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>modeled by GPs as described in Sec.</nobr></span> <nobr>按照第二部分描述的GP模拟。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#3">IV-A.</a></span> <a href="#3">IV-A。</a></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The inferred</span>推断</span> </div><div style="position:absolute;top:5594;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>system noise standard deviations, which comprise stochasticity</nobr></span> <nobr>系统噪声标准偏差，包括随机性</nobr></span> </div><div style="position:absolute;top:5612;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>of the robotic arm, synchronization errors, delays, and image</nobr></span> <nobr>机器人手臂，同步错误，延迟和图像</nobr></span> </div><div style="position:absolute;top:5630;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>processing errors, ranged from 0.5 cm to 2.0 cm.</nobr></span> <nobr>加工误差范围从0.5厘米到2.0厘米。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>These learned</nobr></span> <nobr>这些学到了</nobr></span> </div><div style="position:absolute;top:5648;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>noise levels were in the right ballpark: They were slightly</nobr></span> <nobr>噪音水平是在右边的球场：他们是轻微的</nobr></span> </div><div style="position:absolute;top:5666;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>larger than the expected camera noise [</nobr> <a href="#8">2]</a> .</span> <nobr>大于预期的相机噪声[</nobr> <a href="#8">2]</a> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The signal-to-noise</span>信号噪声</span> </div><div style="position:absolute;top:5684;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ratio in our experiments ranged from 2 to 6.</nobr></span> <nobr>比例在我们的实验范围从2到6。</nobr></span> </div><div style="position:absolute;top:5703;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In Sec.</nobr></span> <nobr>在第二部分</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#5">VA,</a> we evaluate the applicability of the <font style="font-size:9px">PILCO</font></span> <a href="#5">VA，</a>我们评估<font style="font-size:9px">PILCO</font>的适用性</span> </div><div style="position:absolute;top:5721;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>framework to autonomous block stacking when starting from</nobr></span> <nobr>框架自动块堆叠时，从开始</nobr></span> </div><div style="position:absolute;top:5739;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a fully upright robot configuration.</nobr></span> <nobr>一个完全直立的机器人配置。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>For each block, an inde-</nobr></span> <nobr>对于每个块，</nobr></span> </div><div style="position:absolute;top:5757;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>pendent controller is learned.</nobr></span> <nobr>下属管制员学习。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In Sec.</nobr></span> <nobr>在第二部分</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#6">VB,</a> we analyze <font style="font-size:9px">PILCO</font> ’s</span> <a href="#6">VB，</a>我们分析<font style="font-size:9px">PILCO</font>的</span> </div><div style="position:absolute;top:5775;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ability to exploit useful prior information by transferring</nobr></span> <nobr>通过转移利用有用的先验信息的能力</nobr></span> </div><div style="position:absolute;top:5793;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>knowledge from one learned controller to another one.</nobr></span> <nobr>知识从一个学习控制器到另一个。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In</nobr></span> <nobr>在</nobr></span> </div><div style="position:absolute;top:5811;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Sec.</nobr></span> <nobr>秒。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#6">VC,</a> the robot learned building a tower, where the initial</span> <a href="#6">VC，</a>机器人学会了建造塔楼，最初的地方</span> </div><div style="position:absolute;top:5829;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>position was below the topmost block.</nobr></span> <nobr>位置在最上面的块之下。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>For this task, state-</nobr></span> <nobr>为了这个任务，</nobr></span> </div><div style="position:absolute;top:5847;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>space constraints such as obstacles were taken into account</nobr></span> <nobr>考虑到诸如障碍物等空间限制</nobr></span> </div><div style="position:absolute;top:5865;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>during planning, see Sec.</nobr></span> <nobr>在规划过程中，请参见</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#4">IV-D</a> .</span> <a href="#4">IV-D</a> 。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">Videos can be found at</span>视频可以在</span> </div><div style="position:absolute;top:5882;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="https://translate.google.com/translate?hl=zh-CN&amp;prev=_t&amp;sl=en&amp;tl=zh-CN&amp;u=http://www.cs.uw.edu/ai/Mobile_Robotics/projects/robot-rl">http://www.cs.uw.edu/ai/Mobile Robotics/projects/robot-rl.</a></span> <a href="https://translate.google.com/translate?hl=zh-CN&amp;prev=_t&amp;sl=en&amp;tl=zh-CN&amp;u=http://www.cs.uw.edu/ai/Mobile_Robotics/projects/robot-rl">http://www.cs.uw.edu/ai/Mobile Robotics / projects / robot-rl。</a></span> </div><div style="position:absolute;top:5921;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A. Independent Controllers for Building a Tower</nobr></span> <nobr>A.建立塔的独立控制者</nobr></span> </div><div style="position:absolute;top:5948;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We split the task of building a tower into learning individual</nobr></span> <nobr>我们把建塔的任务分解成学习个人</nobr></span> </div><div style="position:absolute;top:5966;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>controllers for each target block B2–B6 (bottom to top)</nobr></span> <nobr>每个目标块B2-B6的控制器（从下到上）</nobr></span> </div><div style="position:absolute;top:5984;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>starting from the same initial configuration, in which the robot</nobr></span> <nobr>从相同的初始配置开始，其中机器人</nobr></span> </div><div style="position:absolute;top:6002;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>arm was upright, see Fig.</nobr> <a href="#5">4.</a></span> <nobr>手臂直立，见图</nobr> <a href="#5">4。</a></span> </div><div style="position:absolute;top:5371;left:228"><a href="#4" style="background-color:#0000ff;padding:22px 18px;"></a></div><div style="position:absolute;top:6115;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="6"><b>Page 6</b></a></font></span> <font face="arial,sans-serif"><a name="6"><b>第6页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:6326;left:91"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:6326;left:109"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:6326;left:126"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>3</nobr></span> <nobr>3</nobr></span> </div><div style="position:absolute;top:6326;left:144"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>4</nobr></span> <nobr>4</nobr></span> </div><div style="position:absolute;top:6326;left:161"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>5</nobr></span> <nobr>五</nobr></span> </div><div style="position:absolute;top:6326;left:179"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>6</nobr></span> <nobr>6</nobr></span> </div><div style="position:absolute;top:6326;left:196"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>7</nobr></span> <nobr>7</nobr></span> </div><div style="position:absolute;top:6326;left:214"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>8</nobr></span> <nobr>8</nobr></span> </div><div style="position:absolute;top:6326;left:231"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>9</nobr></span> <nobr>9</nobr></span> </div><div style="position:absolute;top:6326;left:247"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>10</nobr></span> <nobr>10</nobr></span> </div><div style="position:absolute;top:6323;left:89"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:6306;left:89"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>5</nobr></span> <nobr>五</nobr></span> </div><div style="position:absolute;top:6289;left:86"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>10</nobr></span> <nobr>10</nobr></span> </div><div style="position:absolute;top:6272;left:86"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>15</nobr></span> <nobr>15</nobr></span> </div><div style="position:absolute;top:6256;left:86"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>20</nobr></span> <nobr>20</nobr></span> </div><div style="position:absolute;top:6239;left:86"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>25</nobr></span> <nobr>25</nobr></span> </div><div style="position:absolute;top:6222;left:86"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>30</nobr></span> <nobr>三十</nobr></span> </div><div style="position:absolute;top:6331;left:153"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>training iteration</nobr></span> <nobr>训练迭代</nobr></span> </div><div style="position:absolute;top:6304;left:84"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>average distance to target (in cm)</nobr></span> <nobr>到目标的平均距离（cm）</nobr></span> </div><div style="position:absolute;top:6347;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(a) Typical learning curve as a func-</nobr></span> <nobr>（a）典型的学习曲线作为函数</nobr></span> </div><div style="position:absolute;top:6360;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>tion of training iterations.</nobr></span> <nobr>训练迭代的重刑。</nobr></span> </div><div style="position:absolute;top:6324;left:306"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.2</nobr></span> <nobr>-0.2</nobr></span> </div><div style="position:absolute;top:6324;left:327"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.1</nobr></span> <nobr>-0.1</nobr></span> </div><div style="position:absolute;top:6324;left:354"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:6324;left:373"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.1</nobr></span> <nobr>0.1</nobr></span> </div><div style="position:absolute;top:6324;left:394"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.2</nobr></span> <nobr>0.2</nobr></span> </div><div style="position:absolute;top:6324;left:415"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.3</nobr></span> <nobr>0.3</nobr></span> </div><div style="position:absolute;top:6226;left:279"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.2</nobr></span> <nobr>-0.2</nobr></span> </div><div style="position:absolute;top:6245;left:279"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.1</nobr></span> <nobr>-0.1</nobr></span> </div><div style="position:absolute;top:6263;left:289"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:6282;left:283"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.1</nobr></span> <nobr>0.1</nobr></span> </div><div style="position:absolute;top:6301;left:283"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.2</nobr></span> <nobr>0.2</nobr></span> </div><div style="position:absolute;top:6320;left:283"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.3</nobr></span> <nobr>0.3</nobr></span> </div><div style="position:absolute;top:6330;left:325"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>x−dist.</nobr></span> <nobr>X-dist的。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to target (in m)</nobr></span> <nobr>目标（米）</nobr></span> </div><div style="position:absolute;top:6292;left:277"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>y−dist.</nobr></span> <nobr>Y型DIST。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>to target (in m)</nobr></span> <nobr>目标（米）</nobr></span> </div><div style="position:absolute;top:6310;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.4</nobr></span> <nobr>-0.4</nobr></span> </div><div style="position:absolute;top:6297;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−0.2</nobr></span> <nobr>-0.2</nobr></span> </div><div style="position:absolute;top:6285;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0</nobr></span> <nobr>0</nobr></span> </div><div style="position:absolute;top:6272;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.2</nobr></span> <nobr>0.2</nobr></span> </div><div style="position:absolute;top:6259;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.4</nobr></span> <nobr>0.4</nobr></span> </div><div style="position:absolute;top:6247;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.6</nobr></span> <nobr>0.6</nobr></span> </div><div style="position:absolute;top:6234;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>0.8</nobr></span> <nobr>0.8</nobr></span> </div><div style="position:absolute;top:6221;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:6209;left:431"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1.2</nobr></span> <nobr>1.2</nobr></span> </div><div style="position:absolute;top:6347;left:264"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(b) Two-dimensional slice through the</nobr></span> <nobr>（二）通过二维切片</nobr></span> </div><div style="position:absolute;top:6360;left:264"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>cost function with obstacles encoded.</nobr></span> <nobr>成本函数与障碍编码。</nobr></span> </div><div style="position:absolute;top:6397;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig. 5. (</nobr> <a href="#6">a)</a> Typical learning curve.</span> <nobr>图5.（</nobr> <a href="#6">a）</a>典型的学习曲线。</span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">The horizontal axis shows the learning</span>横轴表示学习</span> </div><div style="position:absolute;top:6411;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>stage, the vertical axis shows the average distance to the target at time T</nobr></span> <nobr>阶段，纵轴表示在时间T到目标的平均距离</nobr></span> </div><div style="position:absolute;top:6424;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>(with 95% standard error).</nobr></span> <nobr>（具有95％的标准误差）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#6">(b)</a> Two-dimensional slice through the cost function</span> <a href="#6">（b）</a>通过成本函数的二维切片</span> </div><div style="position:absolute;top:6438;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>defined in task space with observed end effector trajectory.</nobr></span> <nobr>在任务空间中用观察到的末端执行器轨迹定义。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The z-coordinate</nobr></span> <nobr>z坐标</nobr></span> </div><div style="position:absolute;top:6451;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>is set to be at the target.</nobr></span> <nobr>被设置为目标。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Red and blue colors indicate high and low costs,</nobr></span> <nobr>红色和蓝色表明成本高，成本低，</nobr></span> </div><div style="position:absolute;top:6465;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>respectively.</nobr></span> <nobr>分别。</nobr></span> </div><div style="position:absolute;top:6510;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>All independently trained controllers shared the same initial</nobr></span> <nobr>所有独立训练的控制器共享相同的初始</nobr></span> </div><div style="position:absolute;top:6528;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>trial.</nobr></span> <nobr>审判。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A total of ten learning-interacting iterations (including</nobr></span> <nobr>总共十个学习交互迭代（包括</nobr></span> </div><div style="position:absolute;top:6546;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the random initial trial) generally sufficed to learn both good</nobr></span> <nobr>随机初始试验）一般都足以学会两者的好处</nobr></span> </div><div style="position:absolute;top:6563;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>dynamics models and good controllers.</nobr></span> <nobr>动力学模型和好的控制器。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Fig.</nobr> <a href="#6">5(a</a> ) shows a</span> <nobr>图</nobr> <a href="#6">5（a</a> ）显示了一个</span> </div><div style="position:absolute;top:6581;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learning curve for a typical training session (averaged over</nobr></span> <nobr>学习曲线为典型的训练课程（平均）</nobr></span> </div><div style="position:absolute;top:6599;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>ten test runs after each learning stage and all blocks B2–B6).</nobr></span> <nobr>在每个学习阶段之后进行10次测试并且所有块B2-B6）。</nobr></span> </div><div style="position:absolute;top:6617;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Learning noticeably kicked in after about four iterations.</nobr></span> <nobr>在大约四次迭代之后，学习明显地被踢了进来。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>After</nobr></span> <nobr>后</nobr></span> </div><div style="position:absolute;top:6635;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>10 learning iterations, the block in the gripper was expected to</nobr></span> <nobr>10次​​学习迭代，抓手中的块被期望</nobr></span> </div><div style="position:absolute;top:6653;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>be very close (approximately at noise level) to the target.</nobr></span> <nobr>对目标非常接近（大约在噪音水平）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The</nobr></span> <nobr>该</nobr></span> </div><div style="position:absolute;top:6671;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>required interaction time sums up to only 50 s per controller</nobr></span> <nobr>所需的交互时间总计仅为每个控制器50 s</nobr></span> </div><div style="position:absolute;top:6689;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>and 230 s in total (the initial random trial is counted only</nobr></span> <nobr>总共230s（最初的随机试验只计算在内</nobr></span> </div><div style="position:absolute;top:6707;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>once).</nobr></span> <nobr>一旦）。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>This speed of learning is very difficult to achieve by</nobr></span> <nobr>这样的学习速度很难达到</nobr></span> </div><div style="position:absolute;top:6725;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>other RL methods that learn from scratch [</nobr> <a href="#8">7]</a> .</span> <nobr>其他RL方法从头开始学习[</nobr> <a href="#8">7]</a> 。</span> </div><div style="position:absolute;top:6743;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A standard myopic task-space control method such as</nobr></span> <nobr>一个标准的近视任务空间控制方法，如</nobr></span> </div><div style="position:absolute;top:6761;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Jacobian-transpose control</nobr> <a href="#8">[</a> 13] (using the GP dynamics</span> <nobr>雅可比转置控制</nobr> <a href="#8">[</a> 13]（使用GP动力学</span> </div><div style="position:absolute;top:6779;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>model) could solve the problem, too, without any planning.</nobr></span> <nobr>模型）也可以解决问题，而没有任何计划。</nobr></span> </div><div style="position:absolute;top:6797;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>However, this approach benefits from a good dynamics model</nobr></span> <nobr>但是，这种方法受益于良好的动力学模型</nobr></span> </div><div style="position:absolute;top:6815;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>along the desired trajectory in task space.</nobr></span> <nobr>沿任务空间的期望的轨迹。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Obtaining this model</nobr></span> <nobr>获得这个模型</nobr></span> </div><div style="position:absolute;top:6833;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>through motor babbling can be data inefficient.</nobr></span> <nobr>通过电机bab can可能会导致数据效率低下。</nobr></span> </div><div style="position:absolute;top:6862;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B. Sequential Transfer Learning</nobr></span> <nobr>B.顺序转移学习</nobr></span> </div><div style="position:absolute;top:6885;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>We now evaluate how much we can speed up learning by</nobr></span> <nobr>我们现在评估我们可以加快学习的速度</nobr></span> </div><div style="position:absolute;top:6903;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>transferring knowledge.</nobr></span> <nobr>传授知识。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>To do so, we exploited the sequential</nobr></span> <nobr>为此，我们利用顺序</nobr></span> </div><div style="position:absolute;top:6921;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>nature of the block-stacking task.</nobr></span> <nobr>块堆叠任务的性质。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In Sec.</nobr></span> <nobr>在第二部分</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#5">VA,</a> we trained five</span> <a href="#5">VA，</a>我们训练了五场</span> </div><div style="position:absolute;top:6939;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>independent controllers for the five different blocks B2–B6.</nobr></span> <nobr>独立的控制器为五个不同的块B2-B6。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>In</nobr></span> <nobr>在</nobr></span> </div><div style="position:absolute;top:6957;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>the following, we report results for training the first controller</nobr></span> <nobr>以下，我们报告第一个控制器的培训结果</nobr></span> </div><div style="position:absolute;top:6975;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>for the bottom block B2 as earlier.</nobr></span> <nobr>对于底部块B2，如前所述。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Subsequently, however,</nobr></span> <nobr>然而，随后，</nobr></span> </div><div style="position:absolute;top:6993;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>we reused both the dynamics model and the controller pa-</nobr></span> <nobr>我们重新使用了动力学模型和控制器pa-</nobr></span> </div><div style="position:absolute;top:7011;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>rameters when learning the controller for the next block.</nobr></span> <nobr>在学习下一个模块的控制器时的音量。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>This</nobr></span> <nobr>这个</nobr></span> </div><div style="position:absolute;top:7029;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>initialization of the learning process was more informed than</nobr></span> <nobr>学习过程的初始化比通知更多</nobr></span> </div><div style="position:absolute;top:7046;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>a random one and gave the learner a head-start: Learning to</nobr></span> <nobr>一个随机的，给了学习者一个开始：学习</nobr></span> </div><div style="position:absolute;top:7064;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>stack a new block on the topmost one requires a sufficiently</nobr></span> <nobr>在最上面的一个堆叠一个新的块需要足够的</nobr></span> </div><div style="position:absolute;top:7082;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>good dynamics model in similar parts of the state space.</nobr></span> <nobr>良好的动态模型在状态空间的相似部分。</nobr></span> </div><div style="position:absolute;top:7100;left:88"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Tab.</nobr></span> <nobr>标签。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#6">I</a> summarizes the gains through this kind of transfer</span> <a href="#6">我</a>总结了通过这种转移的收益</span> </div><div style="position:absolute;top:7118;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learning.</nobr></span> <nobr>学习。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Learning to stack a block of six blocks (the base</nobr></span> <nobr>学习堆砌六个街区（基地</nobr></span> </div><div style="position:absolute;top:7136;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B1 is given) required only 90 s of experience when</nobr> <font style="font-size:9px">PILCO</font></span> <nobr>B1给出）只需要90秒的</nobr> <font style="font-size:9px">PILCO</font> <nobr>经验</nobr></span> </div><div style="position:absolute;top:7154;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>exploited the sequential nature of this task, compared to 230 s</nobr></span> <nobr>利用这个任务的连续性，相比之下，230秒</nobr></span> </div><div style="position:absolute;top:7172;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>when five controllers were learned independently from scratch,</nobr></span> <nobr>当五个控制器从头开始独立学习时，</nobr></span> </div><div style="position:absolute;top:7190;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>see Sec.</nobr></span> <nobr>见第二节。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#5">VA.</a></span> <a href="#5">VA。</a></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left">In other words, the amount of data required for</span>换句话说，所需的数据量</span> </div><div style="position:absolute;top:6196;left:633"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>TABLE I</nobr></span> <nobr>表一</nobr></span> </div><div style="position:absolute;top:6209;left:554"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>T</nobr> <font style="font-size:7px">RANSFER LEARNING GAINS</font> ( <font style="font-size:7px">SETUP</font> 1).</span> <nobr>T</nobr> <font style="font-size:7px">RANSFER学习增益</font> （ <font style="font-size:7px">设置</font> 1）。</span> </div><div style="position:absolute;top:6238;left:643"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B2</nobr></span> <nobr>B2</nobr></span> </div><div style="position:absolute;top:6238;left:676"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B2–B3</nobr></span> <nobr>B2-B3</nobr></span> </div><div style="position:absolute;top:6238;left:719"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B2–B4</nobr></span> <nobr>B2-B4</nobr></span> </div><div style="position:absolute;top:6238;left:764"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B2–B5</nobr></span> <nobr>B2-B5</nobr></span> </div><div style="position:absolute;top:6238;left:809"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B2–B6</nobr></span> <nobr>B2，B6</nobr></span> </div><div style="position:absolute;top:6249;left:478"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>trials (seconds) independent controllers</nobr></span> <nobr>试用（秒）独立控制器</nobr></span> </div><div style="position:absolute;top:6249;left:635"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>10 (50)</nobr></span> <nobr>10（50）</nobr></span> </div><div style="position:absolute;top:6249;left:676"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>19 (95)</nobr></span> <nobr>19（95）</nobr></span> </div><div style="position:absolute;top:6249;left:716"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>28 (140)</nobr></span> <nobr>28（140）</nobr></span> </div><div style="position:absolute;top:6249;left:761"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>37 (185)</nobr></span> <nobr>37（185）</nobr></span> </div><div style="position:absolute;top:6249;left:806"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>46 (230)</nobr></span> <nobr>46（230）</nobr></span> </div><div style="position:absolute;top:6259;left:478"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>trials (seconds) sequential controllers</nobr></span> <nobr>试用（秒）顺序控制器</nobr></span> </div><div style="position:absolute;top:6259;left:635"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>10 (50)</nobr></span> <nobr>10（50）</nobr></span> </div><div style="position:absolute;top:6259;left:676"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>12 (60)</nobr></span> <nobr>12（60）</nobr></span> </div><div style="position:absolute;top:6259;left:718"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>14 (70)</nobr></span> <nobr>14（70）</nobr></span> </div><div style="position:absolute;top:6259;left:763"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>16 (80)</nobr></span> <nobr>16（80）</nobr></span> </div><div style="position:absolute;top:6259;left:808"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>18 (90)</nobr></span> <nobr>18（90）</nobr></span> </div><div style="position:absolute;top:6270;left:478"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>speedup (independent/sequential)</nobr></span> <nobr>加速（独立/顺序）</nobr></span> </div><div style="position:absolute;top:6270;left:646"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1</nobr></span> <nobr>1</nobr></span> </div><div style="position:absolute;top:6270;left:681"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>1.58</nobr></span> <nobr>1.58</nobr></span> </div><div style="position:absolute;top:6270;left:730"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2</nobr></span> <nobr>2</nobr></span> </div><div style="position:absolute;top:6270;left:769"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2.31</nobr></span> <nobr>2.31</nobr></span> </div><div style="position:absolute;top:6270;left:814"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>2.56</nobr></span> <nobr>2.56</nobr></span> </div><div style="position:absolute;top:6292;left:631"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>TABLE II</nobr></span> <nobr>表二</nobr></span> </div><div style="position:absolute;top:6306;left:488"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>A</nobr> <font style="font-size:7px">VERAGE BLOCK DEPOSIT SUCCESS IN</font> 10 <font style="font-size:7px">TEST TRIALS AND FOUR</font></span> <nobr>一个</nobr> <font style="font-size:7px">VERAGE块矿成功</font> 10个<font style="font-size:7px">测试试验和四</font></span> </div><div style="position:absolute;top:6321;left:497"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>DIFFERENT</nobr> <font style="font-size:9px">(</font> RANDOM <font style="font-size:9px">)</font> LEARNING INITIALIZATIONS <font style="font-size:9px">(</font> SETUP <font style="font-size:9px">1).</font></span> <nobr>不同</nobr> <font style="font-size:9px">（</font>随机<font style="font-size:9px">）</font>学习的初始化<font style="font-size:9px">（</font>设置<font style="font-size:9px">1）。</font></span> </div><div style="position:absolute;top:6349;left:620"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B2</nobr></span> <nobr>B2</nobr></span> </div><div style="position:absolute;top:6349;left:671"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B3</nobr></span> <nobr>B3</nobr></span> </div><div style="position:absolute;top:6349;left:722"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B4</nobr></span> <nobr>B4</nobr></span> </div><div style="position:absolute;top:6349;left:768"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B5</nobr></span> <nobr>B5</nobr></span> </div><div style="position:absolute;top:6349;left:812"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>B6</nobr></span> <nobr>B6</nobr></span> </div><div style="position:absolute;top:6363;left:478"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>independent controllers</nobr></span> <nobr>独立控制器</nobr></span> </div><div style="position:absolute;top:6362;left:610"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>92.5%</nobr></span> <nobr>92.5％</nobr></span> </div><div style="position:absolute;top:6362;left:666"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>80%</nobr></span> <nobr>80％</nobr></span> </div><div style="position:absolute;top:6362;left:712"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>42.5%</nobr></span> <nobr>42.5％</nobr></span> </div><div style="position:absolute;top:6362;left:763"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>96%</nobr></span> <nobr>96％</nobr></span> </div><div style="position:absolute;top:6362;left:805"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>100%</nobr></span> <nobr>100％</nobr></span> </div><div style="position:absolute;top:6376;left:478"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>sequential controllers</nobr></span> <nobr>顺序控制器</nobr></span> </div><div style="position:absolute;top:6376;left:610"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>92.5%</nobr></span> <nobr>92.5％</nobr></span> </div><div style="position:absolute;top:6376;left:661"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>87.5%</nobr></span> <nobr>87.5％</nobr></span> </div><div style="position:absolute;top:6376;left:712"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>82.5%</nobr></span> <nobr>82.5％</nobr></span> </div><div style="position:absolute;top:6376;left:763"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>95%</nobr></span> <nobr>95％</nobr></span> </div><div style="position:absolute;top:6376;left:808"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>95%</nobr></span> <nobr>95％</nobr></span> </div><div style="position:absolute;top:6422;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learning independent controllers for B2–B3 was sufficient to</nobr></span> <nobr>学习B2-B3的独立控制器就足够了</nobr></span> </div><div style="position:absolute;top:6440;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>learn stacking the entire tower of six blocks when knowledge</nobr></span> <nobr>学习知识的时候，学习整个六个街区的塔</nobr></span> </div><div style="position:absolute;top:6458;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>was transferred.</nobr></span> <nobr>被转移。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Sequential controller learning required only</nobr></span> <nobr>仅需要顺序控制器学习</nobr></span> </div><div style="position:absolute;top:6476;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>two additional trials per block to achieve a performance similar</nobr></span> <nobr>两个额外的试验每块实现类似的性能</nobr></span> </div><div style="position:absolute;top:6494;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>or better to a corresponding controller learned independent of</nobr></span> <nobr>或者更好地向相应的控制器学习独立</nobr></span> </div><div style="position:absolute;top:6512;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>all other controllers, see Tab.</nobr></span> <nobr>所有其他控制器，请参见Tab。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#6">II.</a></span> <a href="#6">II。</a></span> </div><div style="position:absolute;top:6530;left:483"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Tab.</nobr></span> <nobr>标签。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#6">II</a> reports the rates for successfully depositing the block</span> <a href="#6">II</a>报告成功存款的比率</span> </div><div style="position:absolute;top:6548;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>on the top of the current foam tower in 10 test trials and four</nobr></span> <nobr>在目前的泡沫塔顶部进行了10次试验和4次</nobr></span> </div><div style="position:absolute;top:6566;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>different learning initializations.</nobr></span> <nobr>不同的学习初始化。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Failures were largely caused</nobr></span> <nobr>失败主要是造成的</nobr></span> </div><div style="position:absolute;top:6584;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>by the foam block bumping off the topmost block.</nobr></span> <nobr>由泡沫块撞上最上面的块。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>The table</nobr></span> <nobr>桌子</nobr></span> </div><div style="position:absolute;top:6602;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>indicates that sequentially trained controllers perform at least</nobr></span> <nobr>表示顺序训练的控制器至少执行</nobr></span> </div><div style="position:absolute;top:6619;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">还有独立了解到控制器，尽管</font></font></nobr> </div><div style="position:absolute;top:6637;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">事实上，他们用较少的基本训练迭代，看</font></font></nobr> </div><div style="position:absolute;top:6655;left:468"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Tab.</nobr></span> <nobr>标签。</nobr></span> <a href="#6"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">我</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">。</font><font style="vertical-align: inherit;">在四个学习设置之一，10次迭代学习</font></font></div><div style="position:absolute;top:6673;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">并不足以学习的好（独立）控制器B4，</font></font></nobr> </div><div style="position:absolute;top:6691;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">这对于相应差平均存款的原因</font></font></nobr> </div><div style="position:absolute;top:6709;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在标签的成功。</font></font></nobr> <a href="#6"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">II</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">。</font><font style="vertical-align: inherit;">相应的顺序控制器</font></font></div><div style="position:absolute;top:6727;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">利用动力学模型的初始化信息</font></font></nobr> </div><div style="position:absolute;top:6745;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">从B3并没有从这种失败的痛苦。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Although</nobr></span> <nobr>虽然</nobr></span> </div><div style="position:absolute;top:6763;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">存款失败的反馈是不提供给学习者时，</font></font></nobr> </div><div style="position:absolute;top:6781;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">存款成功跨越10个测试试验和四种不同的好</font></font></nobr> </div><div style="position:absolute;top:6799;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">培训设置。</font><font style="vertical-align: inherit;">注意与山猫的是高精度的控制</font></font></nobr> </div><div style="position:absolute;top:6817;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">手臂是困难的，因为手臂可以很生涩。</font></font></nobr> </div><div style="position:absolute;top:6835;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">通过动力学模型知识转移是非常</font></font></nobr> </div><div style="position:absolute;top:6853;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">有价值。</font><font style="vertical-align: inherit;">另外传送控制器参数ψ</font></font></nobr> </div><div style="position:absolute;top:6871;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在顺序设置不是决定性的：重设控制器</font></font></nobr> </div><div style="position:absolute;top:6888;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">参数为零，再训练会导致非常相似的结果。</font></font></nobr> </div><div style="position:absolute;top:6906;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">一个“信息”控制器初始化没有动态</font></font></nobr> </div><div style="position:absolute;top:6924;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">模型将不利于学习，因为控制器param-</font></font></nobr> </div><div style="position:absolute;top:6942;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">ETERS在当前动力学模型的光教训。</font></font></nobr> </div><div style="position:absolute;top:6970;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">C.防撞</font></font></nobr> </div><div style="position:absolute;top:6993;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">如果对环境的先验知识是已知的，它是</font></font></nobr> </div><div style="position:absolute;top:7011;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">有助于纳入规划这一点; </font><font style="vertical-align: inherit;">至少为了</font></font></nobr> </div><div style="position:absolute;top:7029;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">延长机器人的续航时间。</font><font style="vertical-align: inherit;">迄今，我们假定</font></font></nobr> </div><div style="position:absolute;top:7047;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">学习系统完全不知情的有关环境。</font></font></nobr> </div><div style="position:absolute;top:7065;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">这意味着，当有障碍，也就是说，一个表中，</font></font></nobr> </div><div style="position:absolute;top:7083;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">机器人了解了表如下：当机器人手臂</font></font></nobr> </div><div style="position:absolute;top:7100;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">拍着桌子，块的预测轨迹</font></font></nobr> </div><div style="position:absolute;top:7118;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">抓手和观察的轨迹不匹配。</font></font></nobr> </div><div style="position:absolute;top:7136;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在随后的试验中，当GP动力学模型占了</font></font></nobr> </div><div style="position:absolute;top:7154;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">为了这个经验，机器人发现了一个更好的轨迹</font></font></nobr> </div><div style="position:absolute;top:7172;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">没有卡在桌子的表面上。</font></font></nobr> </div><div style="position:absolute;top:7190;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在下文中，我们考虑修改专有名词，</font></font></nobr> </div><div style="position:absolute;top:7303;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="7"><b>Page 7</b></a></font></span> <font face="arial,sans-serif"><a name="7"><b>第7页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:7430;left:151"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>target</nobr></span> <nobr>目标</nobr></span> </div><div style="position:absolute;top:7475;left:313"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">初始状态</font></font></nobr> </div><div style="position:absolute;top:7576;left:85"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">图6.学习设置2：初始位置在塔顶以下。</font></font></nobr> </div><div style="position:absolute;top:7623;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">意识形态：环境中的障碍是明确的</font></font></nobr> </div><div style="position:absolute;top:7641;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">纳入计划，见第二节</font></font></nobr> <a href="#4"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">。</font><font style="vertical-align: inherit;">IV-D</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">当机器人时</font></font></div><div style="position:absolute;top:7658;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">应该学习建设一个块塔，最初的地方</font></font></nobr> </div><div style="position:absolute;top:7676;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">状态低于目标状态，见图</font></font></nobr> <a href="#7"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">6。</font></font></a> </div><div style="position:absolute;top:7695;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">由于预先不知道所需的轨迹，</font></font></nobr> </div><div style="position:absolute;top:7713;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">雅可比转置控制将导致之间的碰撞</font></font></nobr> </div><div style="position:absolute;top:7731;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在末端执行器块和塔的最顶部块。</font></font></nobr> </div><div style="position:absolute;top:7749;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">由于控制维度R </font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">4</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">大于任务空间</font></font></div><div style="position:absolute;top:7767;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">维度R </font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">3</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">，线性策略π：R </font></font><font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">3</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> →R </font></font><font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">4</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">仍然足够</font></font></div><div style="position:absolute;top:7785;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">解决这个问题，这在任务空间是非线性的。</font></font></nobr> </div><div style="position:absolute;top:7803;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在考虑的实验设置中，我们模拟了塔</font></font></nobr> </div><div style="position:absolute;top:7821;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">作为一组块。</font><font style="vertical-align: inherit;">继Sec </font></font></nobr> <a href="#4"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">。</font><font style="vertical-align: inherit;">IV-</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> D，我们添加了一个高斯 -</font></font></div><div style="position:absolute;top:7839;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">每块的形状惩罚：平均值μ</font></font></nobr> </div><div style="position:absolute;top:7835;left:348"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:7846;left:348"> <nobr><font style="vertical-align: inherit;"></font></nobr> <font style="vertical-align: inherit;"><font style="font-size:12px"><font style="vertical-align: inherit;">惩罚的</font></font><nobr><font style="vertical-align: inherit;">j</font></nobr></font><font style="font-size:12px"><font style="vertical-align: inherit;"></font></font> </div><div style="position:absolute;top:7857;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">是块的中心，协方差设置</font></font></nobr> </div><div style="position:absolute;top:7875;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">到Σ</font></font></nobr> </div><div style="position:absolute;top:7871;left:105"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>−</nobr></span> <nobr>-</nobr></span> </div><div style="position:absolute;top:7882;left:105"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>j</nobr></span> <nobr>Ĵ</nobr></span> </div><div style="position:absolute;top:7875;left:125"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">=（</font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">3</font></font></font> </div><div style="position:absolute;top:7883;left:153"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>4</nobr></span> <nobr>4</nobr></span> </div><div style="position:absolute;top:7875;left:161"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">）</font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">2</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> I.定义任务空间中的障碍可以是</font></font></div><div style="position:absolute;top:7893;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">自动化使用3D物体检测算法和混合 - </font></font></nobr> </div><div style="position:absolute;top:7911;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Gaussians聚类。</font><font style="vertical-align: inherit;">图</font></font></nobr> <a href="#6"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">5（b）</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">显示最有趣的</font></font></div><div style="position:absolute;top:7929;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在任务空间成本函数的二维切片各地</font></font></nobr> </div><div style="position:absolute;top:7947;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">目标状态。</font><font style="vertical-align: inherit;">第三坐标被假定为在</font></font></nobr> </div><div style="position:absolute;top:7964;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>target.</nobr></span> <nobr>目标。</nobr></span> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">到塔障碍由于刑罚是通过定义</font></font></nobr> </div><div style="position:absolute;top:7982;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">高成本地区。</font><font style="vertical-align: inherit;">请注意，以最低的成本不会发生</font></font></nobr> </div><div style="position:absolute;top:8000;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">正是在目标，但塔略高于。</font><font style="vertical-align: inherit;">在图</font></font></nobr> <a href="#6"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">5（b）中，</font></font></a> </div><div style="position:absolute;top:8018;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">椭圆是初始状态的二维投影</font></font></nobr> </div><div style="position:absolute;top:8036;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">分布p（X </font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">），虚线是一个观察到的轨迹</font></font></div><div style="position:absolute;top:8054;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">当施加所述控制器。</font><font style="vertical-align: inherit;">可以看出如何机器人</font></font></nobr> </div><div style="position:absolute;top:8072;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">臂避免了塔在堆栈的顶部沉积块。</font></font></nobr> </div><div style="position:absolute;top:8091;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">为了评估我们的方法的有效性，在碰撞</font></font></nobr> </div><div style="position:absolute;top:8109;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">回避，</font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">PILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">了解到五个独立的控制器，用于</font></font></div><div style="position:absolute;top:8126;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">建筑泡沫块的塔基础上规划或者</font></font></nobr> </div><div style="position:absolute;top:8144;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">具有或不具有约束。</font><font style="vertical-align: inherit;">所有控制器共享相同的</font></font></nobr> </div><div style="position:absolute;top:8162;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">单个随机试验和10（控制）训练的推出。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>This</nobr></span> <nobr>这个</nobr></span> </div><div style="position:absolute;top:8180;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">相当于每控制器55个s的总体验。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Tab.</nobr></span> <nobr>标签。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#8">III</a></span> <a href="#8">III</a></span> </div><div style="position:absolute;top:8198;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">总结了这些设置的结果（平均超过四</font></font></nobr> </div><div style="position:absolute;top:8216;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">不同的随机学习初始化）以下下</font></font></nobr> </div><div style="position:absolute;top:8234;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">方面：碰撞避免的有效性，块存</font></font></nobr> </div><div style="position:absolute;top:8252;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">成功率，并控制质量。</font></font></nobr> </div><div style="position:absolute;top:8271;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">首先，我们研究了碰撞的有效性avoid-</font></font></nobr> </div><div style="position:absolute;top:8288;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">ANCE。</font><font style="vertical-align: inherit;">我们定义的“碰撞”发生机器人手臂时</font></font></nobr> </div><div style="position:absolute;top:8306;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">相撞泡沫块的塔。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Tab.</nobr></span> <nobr>标签。</nobr></span> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">III</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">表示</font></font></div><div style="position:absolute;top:8324;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">与国家空间限制计划导致较少的冲突</font></font></nobr> </div><div style="position:absolute;top:8342;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">比无关训练的障碍。</font><font style="vertical-align: inherit;">需要注意的是数量 - </font></font></nobr> </div><div style="position:absolute;top:8360;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">BER的选项卡内。</font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">三</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">是在碰撞过程中的训练，而不是在</font></font></div><div style="position:absolute;top:8378;left:73"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>testing.</nobr></span> <nobr>测试。</nobr></span> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">这意味着即使在学习的早期阶段</font></font></nobr> </div><div style="position:absolute;top:7392;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">（当动态模型非常不确定时），</font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">PILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">了解到</font></font></div><div style="position:absolute;top:7410;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">一个“谨慎”的控制器，以避免碰撞。</font></font></nobr> </div><div style="position:absolute;top:7427;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">二，Tab。</font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">III</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">报告的块存款成功率为</font></font></div><div style="position:absolute;top:7445;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">之后的10次测试运行（以及四次不同的训练初始化）</font></font></nobr> </div><div style="position:absolute;top:7463;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">10次​​训练迭代。</font><font style="vertical-align: inherit;">在这里，我们看到，</font></font></nobr> </div><div style="position:absolute;top:7481;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">空间限制导致成功率大幅提高</font></font></nobr> </div><div style="position:absolute;top:7499;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在存款块。</font><font style="vertical-align: inherit;">规划没有国家空间限制</font></font></nobr> </div><div style="position:absolute;top:7517;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">经常导致一个控制器略微触击最高的块</font></font></nobr> </div><div style="position:absolute;top:7535;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">塔，即造成了碰撞。</font></font></nobr> </div><div style="position:absolute;top:7552;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">最后，Tab。</font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">III</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">报告区块的距离</font></font></div><div style="position:absolute;top:7570;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">时间T的抓手，平均超过10次测试运行（之后</font></font></nobr> </div><div style="position:absolute;top:7588;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">相应的控制器已经被训练）和四个</font></font></nobr> </div><div style="position:absolute;top:7606;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">不同的训练设置。</font><font style="vertical-align: inherit;">在时间T，夹子打开，</font></font></nobr> </div><div style="position:absolute;top:7624;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">掉块。</font><font style="vertical-align: inherit;">距离是独立测量的</font></font></nobr> </div><div style="position:absolute;top:7642;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">的碰撞。</font><font style="vertical-align: inherit;">在有限和无约束的规划</font></font></nobr> </div><div style="position:absolute;top:7659;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">学到的控制器把夹子里的块关闭了</font></font></nobr> </div><div style="position:absolute;top:7677;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">到目标位置。</font><font style="vertical-align: inherit;">请注意，表中的距离。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><a href="#8">III</a></span> <a href="#8">III</a></span> </div><div style="position:absolute;top:7695;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">大约等于噪音水平（图像捕捉，图像</font></font></nobr> </div><div style="position:absolute;top:7713;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">处理，机器人手臂）。</font><font style="vertical-align: inherit;">这里的结果并不表明这一点</font></font></nobr> </div><div style="position:absolute;top:7731;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">任何训练设置都会导致更好的“下落地点”</font></font></nobr> </div><div style="position:absolute;top:7749;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">平均。</font><font style="vertical-align: inherit;">但是，学习没有状态空间的限制</font></font></nobr> </div><div style="position:absolute;top:7767;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">开始显示提前一到两个阶段的改善</font></font></nobr> </div><div style="position:absolute;top:7785;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">基于避免碰撞的规划进行学习（不是</font></font></nobr> </div><div style="position:absolute;top:7803;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在表中报道。</font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">III）</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">。</font></font></div><div style="position:absolute;top:7838;left:602"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>VI.</nobr></span> <nobr>VI。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>D</nobr> <font style="font-size:9px">ISCUSSION</font></span> <nobr>D</nobr> <font style="font-size:9px">讨论</font></span> </div><div style="position:absolute;top:7860;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">P </font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">ILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">不是最佳的控制，因为它只是找到一个</font></font></div><div style="position:absolute;top:7878;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">解决方案的任务。</font><font style="vertical-align: inherit;">没有全球性的保证</font></font></nobr> </div><div style="position:absolute;top:7896;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">最优：由于学习的优化问题</font></font></nobr> </div><div style="position:absolute;top:7914;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">策略参数是不是凸的，发现的解决方案</font></font></nobr> </div><div style="position:absolute;top:7932;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">总是只有局部最优。</font><font style="vertical-align: inherit;">这也是在有条件</font></font></nobr> </div><div style="position:absolute;top:7950;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">体验学习者接触到。</font></font></nobr> </div><div style="position:absolute;top:7967;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">P </font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">ILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">利用到一个近似的解析梯度</font></font></div><div style="position:absolute;top:7985;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">预计复位J </font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">π</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">的间接政策搜索。</font><font style="vertical-align: inherit;">因此，</font></font><font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">PILCO</font></font></font> </div><div style="position:absolute;top:8003;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">并不需要维持一个明确的价值函数模型，</font></font></nobr> </div><div style="position:absolute;top:8021;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">它不能很好地扩展到高维。</font><font style="vertical-align: inherit;">抽样</font></font></nobr> </div><div style="position:absolute;top:8039;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">估计所述政策梯度[ </font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">15]</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">是不必要的。</font></font></div><div style="position:absolute;top:8056;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">计算对于给定的策略计划所需的约一</font></font></nobr> </div><div style="position:absolute;top:8074;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">第二的计算时间。</font><font style="vertical-align: inherit;">学习政策要求itera-</font></font></nobr> </div><div style="position:absolute;top:8092;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">略去概率规划和更新的策略参数。</font></font></nobr> </div><div style="position:absolute;top:8110;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">确切的持续时间取决于GP训练集的大小。</font></font></nobr> </div><div style="position:absolute;top:8128;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在本文的实验中，</font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">PILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">一个与之间需要</font></font></div><div style="position:absolute;top:8146;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">三分钟就学会了给定的动力学模型的政策。</font></font></nobr> </div><div style="position:absolute;top:8164;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">因此，数据效率带有更多computa-的价格</font></font></nobr> </div><div style="position:absolute;top:8182;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">周志武开销。</font><font style="vertical-align: inherit;">然而，应用策略（测试）是</font></font></nobr> </div><div style="position:absolute;top:8199;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">实时能力，因为它需要一个简单的功能评测</font></font></nobr> </div><div style="position:absolute;top:8217;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Ù </font></font></nobr> <font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Ť</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> =π（X </font></font><font style="font-size:8px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">吨</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">），这往往是一个矩阵-向量乘法。</font></font></div><div style="position:absolute;top:8235;left:483"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">原则上，没有什么阻止</font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">PILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">从</font></font></div><div style="position:absolute;top:8253;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">扩展到更高维的问题，请参见</font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[7</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> ]对于一些</font></font></div><div style="position:absolute;top:8271;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">例子。</font><font style="vertical-align: inherit;">政策评估和梯度计算规模</font></font></nobr> </div><div style="position:absolute;top:8288;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">立方体在状态尺度</font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[6</font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> ]。</font><font style="vertical-align: inherit;">虽然政策搜索</font></font></div><div style="position:absolute;top:8306;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">仅缩放二次在GP培训的大小n</font></font></nobr> </div><div style="position:absolute;top:8324;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">将[ </font></font></nobr> <a href="#8"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">6] </font></font></a><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">，</font><font style="vertical-align: inherit;">这是</font></font><font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">PILCO</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">的实际瓶颈。</font><font style="vertical-align: inherit;">因此，我们使用</font></font></div><div style="position:absolute;top:8342;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">稀疏GP近似当n≥400这是快速</font></font></nobr> </div><div style="position:absolute;top:8360;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">超过如果底层动力学是复杂的和/或</font></font></nobr> </div><div style="position:absolute;top:8378;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">高的采样频率被使用。</font></font></nobr> </div><div style="position:absolute;top:8491;left:0"><hr><table border="0" width="100%"><tbody><tr><td bgcolor="eeeeee" align="right"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><font face="arial,sans-serif"><a name="8"><b>Page 8</b></a></font></span> <font face="arial,sans-serif"><a name="8"><b>第8页</b></a></font></span> </td></tr></tbody></table></div><div style="position:absolute;top:8572;left:432"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>TABLE III</nobr></span> <nobr>表三</nobr></span> </div><div style="position:absolute;top:8585;left:212"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Ë </font></font></nobr> <font style="font-size:7px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">XPERIMENTAL结果使用和未使用避碰规划</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">（</font></font><font style="font-size:7px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">SETUP</font></font></font><font style="vertical-align: inherit;"><font style="vertical-align: inherit;"> 2）。</font></font></div><div style="position:absolute;top:8615;left:166"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">无防撞</font></font></nobr> </div><div style="position:absolute;top:8615;left:368"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B2</font></font></nobr> </div><div style="position:absolute;top:8615;left:456"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B3</font></font></nobr> </div><div style="position:absolute;top:8615;left:548"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B4</font></font></nobr> </div><div style="position:absolute;top:8615;left:636"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B5</font></font></nobr> </div><div style="position:absolute;top:8615;left:723"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B6</font></font></nobr> </div><div style="position:absolute;top:8629;left:173"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">训练过程中的碰撞</font></font></nobr> </div><div style="position:absolute;top:8629;left:343"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">12/40（30％）</font></font></nobr> </div><div style="position:absolute;top:8629;left:426"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">11/40（27.5％）</font></font></nobr> </div><div style="position:absolute;top:8629;left:518"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">13/40（32.5％）</font></font></nobr> </div><div style="position:absolute;top:8629;left:610"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">18/40（45％）</font></font></nobr> </div><div style="position:absolute;top:8629;left:693"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">21/40（52.5％）</font></font></nobr> </div><div style="position:absolute;top:8642;left:171"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">块存款成功率</font></font></nobr> </div><div style="position:absolute;top:8642;left:364"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">50％</font></font></nobr> </div><div style="position:absolute;top:8642;left:451"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>43%</nobr></span> <nobr>43％</nobr></span> </div><div style="position:absolute;top:8642;left:544"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>37%</nobr></span> <nobr>37％</nobr></span> </div><div style="position:absolute;top:8642;left:631"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>47%</nobr></span> <nobr>47％</nobr></span> </div><div style="position:absolute;top:8642;left:718"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">33％</font></font></nobr> </div><div style="position:absolute;top:8656;left:146"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">距离（厘米）到目标在时刻T</font></font></nobr> </div><div style="position:absolute;top:8656;left:345"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">1.39吨0.81</font></font></nobr> </div><div style="position:absolute;top:8656;left:432"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.73吨0.36</font></font></nobr> </div><div style="position:absolute;top:8656;left:525"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.65吨0.35</font></font></nobr> </div><div style="position:absolute;top:8656;left:612"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.71吨0.46</font></font></nobr> </div><div style="position:absolute;top:8656;left:700"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.59吨0.34</font></font></nobr> </div><div style="position:absolute;top:8673;left:174"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">冲突避免</font></font></nobr> </div><div style="position:absolute;top:8673;left:368"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B2</font></font></nobr> </div><div style="position:absolute;top:8673;left:456"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B3</font></font></nobr> </div><div style="position:absolute;top:8673;left:548"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B4</font></font></nobr> </div><div style="position:absolute;top:8673;left:636"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B5</font></font></nobr> </div><div style="position:absolute;top:8673;left:723"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">B6</font></font></nobr> </div><div style="position:absolute;top:8687;left:173"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">训练过程中的碰撞</font></font></nobr> </div><div style="position:absolute;top:8687;left:349"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0/40（0％）</font></font></nobr> </div><div style="position:absolute;top:8687;left:437"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">2/40（5％）</font></font></nobr> </div><div style="position:absolute;top:8687;left:524"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">1/40（2.5％）</font></font></nobr> </div><div style="position:absolute;top:8687;left:612"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">3/40（7.5％）</font></font></nobr> </div><div style="position:absolute;top:8687;left:699"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">1/40（2.5％）</font></font></nobr> </div><div style="position:absolute;top:8700;left:171"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">块存款成功率</font></font></nobr> </div><div style="position:absolute;top:8700;left:364"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>90%</nobr></span> <nobr>90％</nobr></span> </div><div style="position:absolute;top:8700;left:451"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">97％</font></font></nobr> </div><div style="position:absolute;top:8700;left:544"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>90%</nobr></span> <nobr>90％</nobr></span> </div><div style="position:absolute;top:8700;left:631"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">70％</font></font></nobr> </div><div style="position:absolute;top:8700;left:718"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">97％</font></font></nobr> </div><div style="position:absolute;top:8714;left:146"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">距离（厘米）到目标在时刻T</font></font></nobr> </div><div style="position:absolute;top:8714;left:345"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.89吨0.80</font></font></nobr> </div><div style="position:absolute;top:8714;left:432"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.65吨0.33</font></font></nobr> </div><div style="position:absolute;top:8714;left:525"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.67吨0.46</font></font></nobr> </div><div style="position:absolute;top:8714;left:612"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">0.80吨0.37</font></font></nobr> </div><div style="position:absolute;top:8714;left:700"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">1.34吨0.56</font></font></nobr> </div><div style="position:absolute;top:8756;left:201"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>VII.</nobr></span> <nobr>七。</nobr></span> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>C</nobr> <font style="font-size:9px">ONCLUSION</font></span> <nobr>C</nobr> <font style="font-size:9px">结论</font></span> </div><div style="position:absolute;top:8779;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">我们提出了一个数据高效，完全自主AP-</font></font></nobr> </div><div style="position:absolute;top:8797;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">proach学习机器人控制，即使机器人系统</font></font></nobr> </div><div style="position:absolute;top:8815;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">是非常不精确。</font><font style="vertical-align: inherit;">我们基于模型的政策搜索方法</font></font></nobr> </div><div style="position:absolute;top:8833;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">从政策封闭形式近似推理利润</font></font></nobr> </div><div style="position:absolute;top:8851;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">评估和政策学习解析梯度。</font><font style="vertical-align: inherit;">避免</font></font></nobr> </div><div style="position:absolute;top:8869;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">碰撞中，我们提出了以关于知识的方式</font></font></nobr> </div><div style="position:absolute;top:8887;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">规划中的障碍的环境考虑在内，并</font></font></nobr> </div><div style="position:absolute;top:8905;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">不确定条件下的控制。</font><font style="vertical-align: inherit;">此外，我们评估了</font></font></nobr> </div><div style="position:absolute;top:8923;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在连续任务重用动力学模型的收益。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>With</nobr></span> <nobr>同</nobr></span> </div><div style="position:absolute;top:8941;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">有关机器人和唯一非常普遍的现有知识</font></font></nobr> </div><div style="position:absolute;top:8958;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">任务需要学习，我们证明了良好的控制器，用于</font></font></nobr> </div><div style="position:absolute;top:8976;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">由廉价的机械手的低成本机器人系统</font></font></nobr> </div><div style="position:absolute;top:8994;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">和深度相机可以在短短的试验来学习。</font></font></nobr> </div><div style="position:absolute;top:9012;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">尽管我们目前系统的局限性，我们认为</font></font></nobr> </div><div style="position:absolute;top:9030;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">该整体框架可以容易地适用于处理</font></font></nobr> </div><div style="position:absolute;top:9048;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">更复杂的任务。</font><font style="vertical-align: inherit;">在今后的工作中，我们的目标是了解更多</font></font></nobr> </div><div style="position:absolute;top:9066;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">一般的控制器，可以处理任意位置开始</font></font></nobr> </div><div style="position:absolute;top:9084;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">的夹持器和目标栈。</font><font style="vertical-align: inherit;">这样的抓物</font></font></nobr> </div><div style="position:absolute;top:9102;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">便宜的操纵也是有前途的研究方向。</font></font></nobr> </div><div style="position:absolute;top:9131;left:189"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">一个</font></font></nobr> <font style="font-size:9px"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">CKNOWLEDGEMENTS</font></font></font> </div><div style="position:absolute;top:9155;left:88"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">MP Deisenroth和·福克斯已经由ONR支持</font></font></nobr> </div><div style="position:absolute;top:9173;left:73"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">穆里授予N00014-09-1-1052和英特尔实验室。</font></font></nobr> </div><div style="position:absolute;top:9202;left:220"> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>R</nobr> <font style="font-size:9px">EFERENCES</font></span> <nobr>R</nobr> <font style="font-size:9px">EFERENCES</font></span> </div><div style="position:absolute;top:9225;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[1] http://www.lynxmotion.com。</font></font></nobr> </div><div style="position:absolute;top:9243;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[2] http://www.primesense.com。</font></font></nobr> </div><div style="position:absolute;top:9261;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[3] P. Abbeel和AY伍。</font><font style="vertical-align: inherit;">探索和学徒</font></font></nobr> </div><div style="position:absolute;top:9279;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">学习强化学习。</font><font style="vertical-align: inherit;">在ICML，2005年。</font></font></nobr> </div><div style="position:absolute;top:9297;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[4] JA巴格内尔和JG施耐德。</font><font style="vertical-align: inherit;">自治他 - </font></font></nobr> </div><div style="position:absolute;top:9315;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">通过强化学习策略licopter控制</font></font></nobr> </div><div style="position:absolute;top:9333;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">搜索方法。</font><font style="vertical-align: inherit;">在ICRA，第1615-1620，2001年。</font></font></nobr> </div><div style="position:absolute;top:9351;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[5] B.靴，SM Siddiqi，和GJ戈登。</font><font style="vertical-align: inherit;">关闭</font></font></nobr> </div><div style="position:absolute;top:9369;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">学习规划环与预测国家代表中</font></font></nobr> </div><div style="position:absolute;top:9387;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">tations。</font><font style="vertical-align: inherit;">在R：SS，2010。</font></font></nobr> </div><div style="position:absolute;top:9404;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[6] MP Deisenroth。</font><font style="vertical-align: inherit;">高效的强化学习使用</font></font></nobr> </div><div style="position:absolute;top:9423;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">高斯过程。</font><font style="vertical-align: inherit;">KIT科学出版社，2010。</font></font></nobr> </div><div style="position:absolute;top:9440;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">ISBN 978-3-86644-569-7。</font></font></nobr> </div><div style="position:absolute;top:9458;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[7] MP Deisenroth和CE Rasmussen的。</font><font style="vertical-align: inherit;">PILCO：一</font></font></nobr> </div><div style="position:absolute;top:9476;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">基于模型和数据有效途径策略</font></font></nobr> </div><div style="position:absolute;top:9494;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">搜索。</font><font style="vertical-align: inherit;">在ICML，2011。</font></font></nobr> </div><div style="position:absolute;top:9512;left:81"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[8] MP Deisenroth，CE Rasmussen的，和J.彼得斯。</font><font style="vertical-align: inherit;">Gaus-</font></font></nobr> </div><div style="position:absolute;top:9530;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">仙过程动态规划。</font><font style="vertical-align: inherit;">神经计算，</font></font></nobr> </div><div style="position:absolute;top:9548;left:106"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">72（7-9）：1508至1524年，2009年。</font></font></nobr> </div><div style="position:absolute;top:8756;left:475"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[9] J.柯，DJ克莱恩，D. Fox和D. Haehnel。</font><font style="vertical-align: inherit;">高斯</font></font></nobr> </div><div style="position:absolute;top:8774;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">流程和强化学习的鉴定</font></font></nobr> </div><div style="position:absolute;top:8792;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">和自治飞艇的控制。</font><font style="vertical-align: inherit;">在ICRA，2007年。</font></font></nobr> </div><div style="position:absolute;top:8809;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[10] J. KO和D.福克斯。</font><font style="vertical-align: inherit;">通过高斯学习GP-BayesFilters</font></font></nobr> </div><div style="position:absolute;top:8827;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">处理潜变量模型。</font><font style="vertical-align: inherit;">自主机器人，</font></font></nobr> </div><div style="position:absolute;top:8845;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">30（1），2011。</font></font></nobr> </div><div style="position:absolute;top:8863;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[11] J.库贝尔和J.彼得斯。</font><font style="vertical-align: inherit;">对于汽车原语政策搜索</font></font></nobr> </div><div style="position:absolute;top:8881;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在机器人。</font><font style="vertical-align: inherit;">机器学习，2011。</font></font></nobr> </div><div style="position:absolute;top:8899;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[12] J. Maitin-谢泼德，M.库苏马诺-陶纳，J.雷，和</font></font></nobr> </div><div style="position:absolute;top:8917;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">P. Abbeel。</font><font style="vertical-align: inherit;">根据布握点检测</font></font></nobr> </div><div style="position:absolute;top:8935;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">多视图几何线索及其应用</font></font></nobr> </div><div style="position:absolute;top:8953;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">机器人毛巾折叠。</font><font style="vertical-align: inherit;">在ICRA，2010年。</font></font></nobr> </div><div style="position:absolute;top:8971;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">J. Nakanishi，R. Cory，M. Mistry，J. Peters和S. Schaal。</font></font></nobr> </div><div style="position:absolute;top:8989;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">操作空间控制理论与实证研究</font></font></nobr> </div><div style="position:absolute;top:9007;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">比较。</font><font style="vertical-align: inherit;">IJRR，27（737），2008年6月。</font></font></nobr> </div><div style="position:absolute;top:9025;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[14] Nguyen-Tuong，M. Seeger和J. Peters。</font></font></nobr> <span class="notranslate" onmouseover="_tipon(this)" onmouseout="_tipoff()"><span class="google-src-text" style="direction: ltr; text-align: left"><nobr>Model</nobr></span> <nobr>模型</nobr></span> </div><div style="position:absolute;top:9043;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">学习局部高斯过程回归。</font><font style="vertical-align: inherit;">广告-</font></font></nobr> </div><div style="position:absolute;top:9061;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">vanced Robotics，23（15）：2015-2034，2009。</font></font></nobr> </div><div style="position:absolute;top:9078;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[15] J. Peters和S. Schaal。</font><font style="vertical-align: inherit;">政策梯度方法</font></font></nobr> </div><div style="position:absolute;top:9096;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">机器人。</font><font style="vertical-align: inherit;">在IROS，第2219-2225页，2006年。</font></font></nobr> </div><div style="position:absolute;top:9114;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">J. Pineau，G. Gordon和S. Thrun。</font><font style="vertical-align: inherit;">基于点的价值</font></font></nobr> </div><div style="position:absolute;top:9132;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">迭代：POMDP的任何时间算法。</font><font style="vertical-align: inherit;">在IJCAI，</font></font></nobr> </div><div style="position:absolute;top:9150;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">第1025-1030页，2003年。</font></font></nobr> </div><div style="position:absolute;top:9168;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[17] J. Quinonnon-Candela，A. Girard，J. Larsen和CE</font></font></nobr> </div><div style="position:absolute;top:9186;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">拉斯穆森。</font><font style="vertical-align: inherit;">贝叶斯不确定性的传播</font></font></nobr> </div><div style="position:absolute;top:9204;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">内核模型 - 应用于多步骤</font></font></nobr> </div><div style="position:absolute;top:9222;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">预测。</font><font style="vertical-align: inherit;">ICASSP，第701-704页，2003年。</font></font></nobr> </div><div style="position:absolute;top:9240;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[18] M. Quigley，R. Brewer，SP Soundararaj，V. Pradeep，</font></font></nobr> </div><div style="position:absolute;top:9258;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">问：乐和伍AY。</font><font style="vertical-align: inherit;">低成本加速计</font></font></nobr> </div><div style="position:absolute;top:9276;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">机器人手臂知觉。</font><font style="vertical-align: inherit;">在IROS，2010。</font></font></nobr> </div><div style="position:absolute;top:9294;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[19] M. Quigley的，B. Gerkey，K.康利，J.浮士德，T.富特</font></font></nobr> </div><div style="position:absolute;top:9312;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">J. Leibs，E.伯杰，R.惠勒，和伍AY。</font><font style="vertical-align: inherit;">ROS：一个</font></font></nobr> </div><div style="position:absolute;top:9330;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">开源机器人操作系统。</font><font style="vertical-align: inherit;">在ICRA开路</font></font></nobr> </div><div style="position:absolute;top:9348;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">开源软件研讨会，2009年。</font></font></nobr> </div><div style="position:absolute;top:9365;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[20] CE Rasmussen的和M.库斯。</font><font style="vertical-align: inherit;">高斯过程中</font></font></nobr> </div><div style="position:absolute;top:9383;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">强化学习。</font><font style="vertical-align: inherit;">在NIPS，第751-759，2004。</font></font></nobr> </div><div style="position:absolute;top:9401;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[21] CE拉斯穆森和威廉姆斯CKI。</font><font style="vertical-align: inherit;">高斯</font></font></nobr> </div><div style="position:absolute;top:9419;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">流程，机器学习。</font><font style="vertical-align: inherit;">麻省理工学院出版社，2006年。</font></font></nobr> </div><div style="position:absolute;top:9437;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[22] S. Schaal。</font><font style="vertical-align: inherit;">学习从示范。</font><font style="vertical-align: inherit;">在NIPS，页。</font></font></nobr> </div><div style="position:absolute;top:9455;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">1040-1046，1997年。</font></font></nobr> </div><div style="position:absolute;top:9473;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[23] JG施耐德。</font><font style="vertical-align: inherit;">利用模型不确定性估计</font></font></nobr> </div><div style="position:absolute;top:9491;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">为安全动态控制学习。</font><font style="vertical-align: inherit;">在NIPS，1997年。</font></font></nobr> </div><div style="position:absolute;top:9509;left:468"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">[24] M.图森特和C. Goerick。</font><font style="vertical-align: inherit;">从运动学习到</font></font></nobr> </div><div style="position:absolute;top:9527;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在互动学习的机器人，贝叶斯查看章节</font></font></nobr> </div><div style="position:absolute;top:9545;left:500"> <nobr><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">在电机控制和规划。</font><font style="vertical-align: inherit;">施普林格出版社，2010。</font></font></nobr> </div><!-- t36487r93a48c32263e31790n380u14l0m0k0 -->
<script>_addload(function(){_setupIW('com');_csi('en','zh-CN','%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%2B%E6%9C%BA%E6%A2%B0%E8%87%82-Learning+to+Control+a+Low-Cost+Manipulator+using.pdf');});</script><script type="text/javascript">function gtElInit() {var lib = new google.translate.TranslateService();lib.translatePage('en', 'zh-CN', function () {});}</script><div id="goog-gt-tt" class="skiptranslate" dir="ltr"><div style="padding: 8px;"><div><div class="logo"><img src="https://www.gstatic.com/images/branding/product/1x/translate_24dp.png" width="20" height="20" alt="Google 翻译"></div></div></div><div class="top" style="padding: 8px; float: left; width: 100%;"><h1 class="title gray">原文</h1></div><div class="middle" style="padding: 8px;"><div class="original-text"></div></div><div class="bottom" style="padding: 8px;"><div class="activity-links"><span class="activity-link">提供更好的翻译建议</span><span class="activity-link"></span></div><div class="started-activity-container"><hr style="color: #CCC; background-color: #CCC; height: 1px; border: none;"><div class="activity-root"></div></div></div><div class="status-message" style="display: none;"></div></div><script type="text/javascript" src="https://translate.google.com/translate_a/element.js?cb=gtElInit&amp;client=wt"></script><div class="goog-te-spinner-pos"><div class="goog-te-spinner-animation"><svg xmlns="http://www.w3.org/2000/svg" class="goog-te-spinner" width="96px" height="96px" viewBox="0 0 66 66"><circle class="goog-te-spinner-path" fill="none" stroke-width="6" stroke-linecap="round" cx="33" cy="33" r="30"></circle></svg></div></div><span><div class="gmnoprint" style="display: none; z-index: 2147483647;"><img src="https://www.gstatic.com/translate/infowindow/iws_n.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 8px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iws_n.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 8px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iws_w.png" style="position: absolute; left: 0px; top: 0px; width: 8px; height: 598px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iws_e.png" style="position: absolute; left: 0px; top: 0px; width: 8px; height: 598px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iws_s.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 8px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iws_s.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 8px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iws_c.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 598px; border: 0px; padding: 0px; margin: 0px;"><div class="SPRITE_iws_nw" style="overflow: hidden;"></div><div class="SPRITE_iws_ne" style="overflow: hidden;"></div><div class="SPRITE_iws_sw" style="overflow: hidden;"></div><div class="SPRITE_iws_se" style="overflow: hidden;"></div><div class="SPRITE_iws_tap" style="overflow: hidden;"></div><div class="SPRITE_iws_tap_l" style="overflow: hidden;"></div><div class="SPRITE_iws_tap_u" style="overflow: hidden;"></div><div class="SPRITE_iws_tap_ul" style="overflow: hidden;"></div><div class="SPRITE_iws_tap_rd" style="overflow: hidden;"></div><div class="SPRITE_iws_tap_ld" style="overflow: hidden;"></div><div class="SPRITE_iws_tab_dl" style="overflow: hidden; display: none;"></div><div class="SPRITE_iws_tab_dr" style="overflow: hidden; display: none;"></div><div class="SPRITE_iws_tab_l" style="overflow: hidden; display: none;"></div><div class="SPRITE_iws_tab_r" style="overflow: hidden; display: none;"></div></div></span><div id="google-infowindow" class="notranslate"><div class="gmnoprint" style="display: none; z-index: 2147483647;"><img src="https://www.gstatic.com/translate/infowindow/iw_n.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 6px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iw_n.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 6px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iw_w.png" style="position: absolute; left: 0px; top: 0px; width: 6px; height: 598px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iw_e.png" style="position: absolute; left: 0px; top: 0px; width: 6px; height: 598px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iw_s0.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 6px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iw_s0.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 6px; border: 0px; padding: 0px; margin: 0px;"><img src="https://www.gstatic.com/translate/infowindow/iw_c.png" style="position: absolute; left: 0px; top: 0px; width: 628px; height: 598px; border: 0px; padding: 0px; margin: 0px;"><div class="SPRITE_iw_nw" style="overflow: hidden;"></div><div class="SPRITE_iw_ne" style="overflow: hidden;"></div><div class="SPRITE_iw_xtap" style="overflow: hidden;"></div><div class="SPRITE_iw_xtap_l" style="overflow: hidden;"></div><div class="SPRITE_iw_xtap_u" style="overflow: hidden;"></div><div class="SPRITE_iw_xtap_ul" style="overflow: hidden;"></div><div class="SPRITE_iw_xtap_rd" style="overflow: hidden;"></div><div class="SPRITE_iw_xtap_ld" style="overflow: hidden;"></div><div class="SPRITE_iw_sw0" style="overflow: hidden;"></div><div class="SPRITE_iw_se0" style="overflow: hidden;"></div><div class="SPRITE_close" style="overflow: hidden; z-index: 10000; cursor: pointer; visibility: visible;"></div><div class="SPRITE_maximize" style="overflow: hidden; z-index: 10000; visibility: hidden; cursor: pointer; display: none;"></div><div class="SPRITE_restore" style="overflow: hidden; z-index: 10001; visibility: hidden; cursor: pointer; display: none;"></div></div></div><div style="display: none;"><iframe id="google-feedback-frame" name="" +="" feedback_target_iframe_name="" ""=""></iframe></div></body></html>